In [1]:
import numpy as np
import pandas as pd
import warnings as wr
wr.filterwarnings('ignore')

In [2]:
df = pd.read_csv("supershops.csv") #load csv dataset in jupyter notebook

In [3]:
df.head(10) # to see top 10 dataset from csv

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94
5,131876.9,99814.71,362861.36,Dhaka,156991.12
6,134615.46,147198.87,127716.82,Ctg,156122.51
7,130298.13,145530.06,323876.68,Rangpur,155752.6
8,120542.52,148718.95,311613.29,Dhaka,152211.77
9,123334.88,108679.17,304981.62,Ctg,149759.96


In [4]:
df.info() # check details dataset from csv

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Marketing Spend  50 non-null     float64
 1   Administration   50 non-null     float64
 2   Transport        49 non-null     float64
 3   Area             50 non-null     object 
 4   Profit           50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


In [5]:
x = df.drop('Profit',axis=1) # [x= every colums without 'Profit'] 
# we drop the 'Profit' column. axis=1 for column and axis=0 for row.

In [6]:
x.head() # we can see there is no 'Profit' column 

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,114523.61,136897.8,471784.1,Dhaka
1,162597.7,151377.59,443898.53,Ctg
2,153441.51,101145.55,407934.54,Rangpur
3,144372.41,118671.85,383199.62,Dhaka
4,142107.34,91391.77,366168.42,Rangpur


In [7]:
y=df[['Profit']] # y= only 'Profit' column

In [8]:
y.head() # to see 5 row of 'Profit' column in csv.

Unnamed: 0,Profit
0,192261.83
1,191792.06
2,191050.39
3,182901.99
4,166187.94


# Standard Scaler

In [9]:
from sklearn.preprocessing import StandardScaler
# import Standard Scaler library from Sci-kit learn

In [10]:
std = StandardScaler() # Creating StandardScaler Object

In [11]:
df_ad = std.fit(df[['Administration']]) #fit 'Administration' column.

In [12]:
df_ad # there is nothing inside in df_ad .
# So we must Transform and replace

StandardScaler()

In [13]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,114523.61,136897.8,471784.1,Dhaka
1,162597.7,151377.59,443898.53,Ctg
2,153441.51,101145.55,407934.54,Rangpur
3,144372.41,118671.85,383199.62,Dhaka
4,142107.34,91391.77,366168.42,Rangpur


In [14]:
df.Administration = std.transform(df[['Administration']]) 
# after transform we replace 'Administration' column

In [15]:
df.head() # it show a new 'Administration' column

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,0.560753,471784.1,Dhaka,192261.83
1,162597.7,1.082807,443898.53,Ctg,191792.06
2,153441.51,-0.728257,407934.54,Rangpur,191050.39
3,144372.41,-0.096365,383199.62,Dhaka,182901.99
4,142107.34,-1.079919,366168.42,Rangpur,166187.94


In [16]:
df.Transport = std.fit_transform(df[['Transport']])
# fit 'Transport' column
# we can fit only other column

In [17]:
df.head(10)

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,0.560753,2.165287,Dhaka,192261.83
1,162597.7,1.082807,1.929843,Ctg,191792.06
2,153441.51,-0.728257,1.626191,Rangpur,191050.39
3,144372.41,-0.096365,1.417348,Dhaka,182901.99
4,142107.34,-1.079919,1.27355,Rangpur,166187.94
5,131876.9,-0.776239,1.245627,Dhaka,156991.12
6,134615.46,0.932147,-0.739753,Ctg,156122.51
7,130298.13,0.87198,0.91647,Rangpur,155752.6
8,120542.52,0.986952,0.812928,Dhaka,152211.77
9,123334.88,-0.45664,0.756935,Ctg,149759.96


# using LOOP in Standard Scaler


In [18]:
#Standard Scaler by using LOOP

In [27]:
x.columns

Index(['Marketing Spend', 'Administration', 'Transport', 'Area'], dtype='object')

In [30]:
std = StandardScaler()
for i in x.columns:
    if x.dtypes[i] != object:
        x[i] = std.fit_transform(x[[i]])

In [31]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,0.897913,0.560753,2.165287,Dhaka
1,1.95586,1.082807,1.929843,Ctg
2,1.754364,-0.728257,1.626191,Rangpur
3,1.554784,-0.096365,1.417348,Dhaka
4,1.504937,-1.079919,1.27355,Rangpur


# Nomalization

In [32]:
# Normalization using Scikit Learn library

In [46]:
from sklearn.preprocessing import MinMaxScaler

In [47]:
MinMaxScale = MinMaxScaler() #creating MinMaxScaler object

In [48]:
df['Transport'] = MinMaxScale.fit_transform(df[['Transport']]) 
# fit & transform 'Transport' column

In [49]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,0.560753,1.0,Dhaka,192261.83
1,162597.7,1.082807,0.940893,Ctg,191792.06
2,153441.51,-0.728257,0.864664,Rangpur,191050.39
3,144372.41,-0.096365,0.812235,Dhaka,182901.99
4,142107.34,-1.079919,0.776136,Rangpur,166187.94


# using LOOP in Normalization

In [None]:
# manually process

In [75]:
df = pd.read_csv('supershops.csv') #load csv data set

In [76]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [77]:
MinMaxScale = MinMaxScaler()

In [78]:
x = df.drop('Profit', axis=1) #drop target value and axis=1 for columns

In [79]:
for i in x.columns:
    if x.dtypes[i] != object:
        x[i] = MinMaxScale.fit_transform(x[[i]])

In [80]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,0.692617,0.651744,1.0,Dhaka
1,0.983359,0.761972,0.940893,Ctg
2,0.927985,0.379579,0.864664,Rangpur
3,0.873136,0.512998,0.812235,Dhaka
4,0.859438,0.305328,0.776136,Rangpur


# Max Abs Scaler

In [57]:
from sklearn.preprocessing import MaxAbsScaler

In [58]:
Max_Abs_Scale = MaxAbsScaler() #creating MaxAbsScaler Object

In [60]:
df['Marketing Spend'] = Max_Abs_Scale.fit_transform(df[['Marketing Spend']])
# fit and transform 'Marketing Spend' column

In [61]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,0.692617,0.651744,1.0,Dhaka
1,0.983359,0.761972,0.940893,Ctg
2,0.927985,0.379579,0.864664,Rangpur
3,0.873136,0.512998,0.812235,Dhaka
4,0.859438,0.305328,0.776136,Rangpur


# using LOOP in Max Abs Scaler

In [81]:
df = pd.read_csv('supershops.csv') #load csv data set

In [82]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [83]:
Max_Abs_Scale = MaxAbsScaler() 

In [84]:
x = df.drop('Profit', axis=1)

In [85]:
for i in x.columns:
    if x.dtypes[i] != object:
        x[i] = Max_Abs_Scale.fit_transform(x[[i]])

In [86]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,0.692617,0.749527,1.0,Dhaka
1,0.983359,0.828805,0.940893,Ctg
2,0.927985,0.553781,0.864664,Rangpur
3,0.873136,0.649738,0.812235,Dhaka
4,0.859438,0.500378,0.776136,Rangpur


# Robust Scaler

In [69]:
from sklearn.preprocessing import RobustScaler

In [87]:
rs = RobustScaler()

In [88]:
df['Marketing Spend'] = rs.fit_transform(df[['Marketing Spend']])

In [89]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.67253,136897.8,471784.1,Dhaka,192261.83
1,1.452113,151377.59,443898.53,Ctg,191792.06
2,1.303634,101145.55,407934.54,Rangpur,191050.39
3,1.156567,118671.85,383199.62,Dhaka,182901.99
4,1.119836,91391.77,366168.42,Rangpur,166187.94


# using LOOP in Robust Scaler

In [90]:
df = pd.read_csv('supershops.csv')

In [91]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [92]:
rs = RobustScaler()
x = df.drop('Profit',axis=1)
for column in x.columns:
    if x.dtypes[column] != object:
        df[column] = rs.fit_transform(df[[column]])

In [93]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.67253,0.345355,1.552016,Dhaka,192261.83
1,1.452113,0.697565,1.383714,Ctg,191792.06
2,1.303634,-0.52429,1.166654,Rangpur,191050.39
3,1.156567,-0.097977,1.017368,Dhaka,182901.99
4,1.119836,-0.761543,0.914576,Rangpur,166187.94
