In [1]:
import pandas as pd
import numpy as np
import warnings as wr
wr.filterwarnings('ignore')

In [2]:
df = pd.read_csv('supershops.csv')
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


# LabelEncoder

In [3]:
df1=df.copy()

In [4]:
df1.Area.unique()

array(['Dhaka', 'Ctg', 'Rangpur'], dtype=object)

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df1['Area']=le.fit_transform(df1['Area'])
df1.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


In [6]:
print(le.classes_)

['Ctg' 'Dhaka' 'Rangpur']


# Using Loop to Encode categorical columns 

In [7]:
df2 = df.copy()
df2.columns

Index(['Marketing Spend', 'Administration', 'Transport', 'Area', 'Profit'], dtype='object')

In [8]:
for col in df2.columns:
    if df2[col].dtype == np.number:
        continue
    else:
        df2[col]=le.fit_transform(df2[col])
        
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


In [9]:
df3 = df.copy()
from pandas.core.dtypes.common import is_numeric_dtype

for c in df3.columns:
    if is_numeric_dtype(df3[c]):
        continue
    df3[c]=le.fit_transform(df3[c])
    

df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


# OneHotEncoder

In [10]:
df4=df.copy()
dummy = pd.get_dummies(df4['Area'])  #dummy = pd.get_dummies(df4.Area)
dummy.head()

Unnamed: 0,Ctg,Dhaka,Rangpur
0,0,1,0
1,1,0,0
2,0,0,1
3,0,1,0
4,0,0,1


In [11]:
df4.drop('Area',axis=1,inplace=True)
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit
0,114523.61,136897.8,471784.1,192261.83
1,162597.7,151377.59,443898.53,191792.06
2,153441.51,101145.55,407934.54,191050.39
3,144372.41,118671.85,383199.62,182901.99
4,142107.34,91391.77,366168.42,166187.94


In [12]:
df5=df.copy()
dummy2 = pd.get_dummies(df.Area,drop_first=True,prefix='city')
dummy2.head()

Unnamed: 0,city_Dhaka,city_Rangpur
0,1,0
1,0,0
2,0,1
3,1,0
4,0,1


In [13]:
df4 = pd.concat([df4,dummy2],axis=1)
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit,city_Dhaka,city_Rangpur
0,114523.61,136897.8,471784.1,192261.83,1,0
1,162597.7,151377.59,443898.53,191792.06,0,0
2,153441.51,101145.55,407934.54,191050.39,0,1
3,144372.41,118671.85,383199.62,182901.99,1,0
4,142107.34,91391.77,366168.42,166187.94,0,1


# Using Loop OneHotEncoder

In [14]:
df6=df.copy()

for i in df6.columns:
    if is_numeric_dtype(df6[i]):
        continue
    oneHot = pd.get_dummies(df6[i],drop_first=True,prefix='city')
    df6.drop(df6[[i]],axis=1,inplace=True)
    df6 = pd.concat([df6,oneHot],axis=1)
        
df6.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit,city_Dhaka,city_Rangpur
0,114523.61,136897.8,471784.1,192261.83,1,0
1,162597.7,151377.59,443898.53,191792.06,0,0
2,153441.51,101145.55,407934.54,191050.39,0,1
3,144372.41,118671.85,383199.62,182901.99,1,0
4,142107.34,91391.77,366168.42,166187.94,0,1


# OrdinalEncoder

In [17]:
from sklearn.preprocessing import OrdinalEncoder
df7 = df.copy()
cities = df7.Area.unique()
cities

array(['Dhaka', 'Ctg', 'Rangpur'], dtype=object)

In [18]:
oe = OrdinalEncoder(categories=[cities])

In [21]:
oe_encoded = oe.fit_transform(df7[['Area']])
oe_encoded

array([[0.],
       [1.],
       [2.],
       [0.],
       [2.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [2.],
       [1.],
       [2.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [2.],
       [0.],
       [1.],
       [0.],
       [2.],
       [2.],
       [0.],
       [1.],
       [2.],
       [0.],
       [2.],
       [0.],
       [2.],
       [0.],
       [1.],
       [2.],
       [1.],
       [0.],
       [2.],
       [1.],
       [0.],
       [1.],
       [1.],
       [2.],
       [1.],
       [0.],
       [1.],
       [0.],
       [2.],
       [1.],
       [0.],
       [1.]])

In [22]:
df7['Area'] = pd.DataFrame(oe_encoded)
df7.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,0.0,192261.83
1,162597.7,151377.59,443898.53,1.0,191792.06
2,153441.51,101145.55,407934.54,2.0,191050.39
3,144372.41,118671.85,383199.62,0.0,182901.99
4,142107.34,91391.77,366168.42,2.0,166187.94


# Using loop OrdinalEndoder

In [27]:
df8 = df.copy()
cities_2 = df8['Area'].unique()

for j in df8.columns:
    if is_numeric_dtype(df8[j]):
        continue
    categories_oe2 = df8[j].unique()
    oe2 = OrdinalEncoder(categories=[categories_oe2])
    oe2_encoded = oe2.fit_transform(df8[[j]])
    df8[j] = pd.DataFrame(oe2_encoded)
        
df8.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,0.0,192261.83
1,162597.7,151377.59,443898.53,1.0,191792.06
2,153441.51,101145.55,407934.54,2.0,191050.39
3,144372.41,118671.85,383199.62,0.0,182901.99
4,142107.34,91391.77,366168.42,2.0,166187.94


In [35]:
oe.categories_

[array(['Dhaka', 'Ctg', 'Rangpur'], dtype=object)]