In [1]:
import pandas as pd
df = pd.read_csv('supershop.csv.csv')

In [2]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [3]:
# Keep Copies for future
df1 = df.copy()   
df2 = df.copy()
df3 = df.copy()

# Feature Engineering 

### Normalization

In [4]:
from sklearn.preprocessing import MinMaxScaler   # import libray

In [5]:
minmax = MinMaxScaler()  # object creation , inside () shift+Tab see default feature range (0,1). its is possible to customize the feature range as copies"(feature_range=(5,10))

In [6]:
scaled_1st = minmax.fit(df[['Marketing Spend']]) # trainging, fitting completed, fitting the model by object 

In [7]:
scaled_1st

MinMaxScaler()

In [8]:
df.head()  # no changes in df, because its fitted only, need to transform for scaling

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [9]:
df['Marketing Spend'] = minmax.transform(df[['Marketing Spend']])  # Transform , Transform the model by object

In [10]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,136897.8,471784.1,Dhaka,192261.83
1,0.983359,151377.59,443898.53,Ctg,191792.06
2,0.927985,101145.55,407934.54,Rangpur,191050.39
3,0.873136,118671.85,383199.62,Dhaka,182901.99
4,0.859438,91391.77,366168.42,Rangpur,166187.94


In [11]:
df['Administration'] = minmax.transform(df[['Administration']])

In [12]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,0.827931,471784.1,Dhaka,192261.83
1,0.983359,0.915502,443898.53,Ctg,191792.06
2,0.927985,0.611709,407934.54,Rangpur,191050.39
3,0.873136,0.717704,383199.62,Dhaka,182901.99
4,0.859438,0.55272,366168.42,Rangpur,166187.94


In [13]:
df['Transport'] = minmax.transform(df[['Transport']])

In [14]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,0.827931,2.853259,Dhaka,192261.83
1,0.983359,0.915502,2.684613,Ctg,191792.06
2,0.927985,0.611709,2.467109,Rangpur,191050.39
3,0.873136,0.717704,2.317517,Dhaka,182901.99
4,0.859438,0.55272,2.214516,Rangpur,166187.94


# Standardization

In [15]:
df2.head()   # Get copies original from above, df already scaled.   

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


#### As per standardization formula, new marketing spend scaling

### Manual Method, no library imported

In [16]:
df2['New Marketing Spend'] = (df2['Marketing Spend'] - df2['Marketing Spend'].mean()) / df2['Marketing Spend'].std() # formula

In [17]:
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,New Marketing Spend
0,114523.61,136897.8,471784.1,Dhaka,192261.83,0.888889
1,162597.7,151377.59,443898.53,Ctg,191792.06,1.936203
2,153441.51,101145.55,407934.54,Rangpur,191050.39,1.736731
3,144372.41,118671.85,383199.62,Dhaka,182901.99,1.539157
4,142107.34,91391.77,366168.42,Rangpur,166187.94,1.489812


In [18]:
df2['New Marketing Spend'].std()   # as per formula standard deviation of scaled column must be 1, its verified

1.0

In [19]:
df2['New Marketing Spend'].mean()  # as per formula mean will be ZERO, here see lat 945e-16 e.g ZERO its verified. its scientific form. U can consided this scaled value as Z-Score value also

2.7977620220553945e-16

## Standardization with alternative formula

### Stadardization by Libriary imported, its recommended, see "new marketing spend" & "marketing spend" almost same

In [23]:
from sklearn.preprocessing import StandardScaler   # Libriry import
std = StandardScaler()

In [24]:
df2['Marketing Spend'] = std.fit_transform(df2[['Marketing Spend']])
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,New Marketing Spend
0,0.897913,136897.8,471784.1,Dhaka,192261.83,0.888889
1,1.95586,151377.59,443898.53,Ctg,191792.06,1.936203
2,1.754364,101145.55,407934.54,Rangpur,191050.39,1.736731
3,1.554784,118671.85,383199.62,Dhaka,182901.99,1.539157
4,1.504937,91391.77,366168.42,Rangpur,166187.94,1.489812


In [25]:
df2['Administration'] = std.fit_transform(df2[['Administration']])

In [26]:
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,New Marketing Spend
0,0.897913,0.560753,471784.1,Dhaka,192261.83,0.888889
1,1.95586,1.082807,443898.53,Ctg,191792.06,1.936203
2,1.754364,-0.728257,407934.54,Rangpur,191050.39,1.736731
3,1.554784,-0.096365,383199.62,Dhaka,182901.99,1.539157
4,1.504937,-1.079919,366168.42,Rangpur,166187.94,1.489812


In [27]:
df2['Transport'] = std.fit_transform(df2[['Transport']])
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,New Marketing Spend
0,0.897913,0.560753,2.165287,Dhaka,192261.83,0.888889
1,1.95586,1.082807,1.929843,Ctg,191792.06,1.936203
2,1.754364,-0.728257,1.626191,Rangpur,191050.39,1.736731
3,1.554784,-0.096365,1.417348,Dhaka,182901.99,1.539157
4,1.504937,-1.079919,1.27355,Rangpur,166187.94,1.489812


# Scaling - Max Value

In [28]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [29]:
df3['Marketing Spend'].max()  # Get the max value & always divided this this max

165349.2

In [30]:
df3['Marketing Spend'] = df3['Marketing Spend'] / df3['Marketing Spend'].max()

In [31]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,136897.8,471784.1,Dhaka,192261.83
1,0.983359,151377.59,443898.53,Ctg,191792.06
2,0.927985,101145.55,407934.54,Rangpur,191050.39
3,0.873136,118671.85,383199.62,Dhaka,182901.99
4,0.859438,91391.77,366168.42,Rangpur,166187.94


# Scaling - Max Absolute Scaler

In [39]:
from sklearn.preprocessing import MaxAbsScaler
scaler = MaxAbsScaler()

In [40]:
df3['Administration'] = scaler.fit_transform(df2[['Administration']])

In [41]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Administration\t
0,0.692617,0.221993,471784.1,Dhaka,192261.83,0.749527
1,0.983359,0.428666,443898.53,Ctg,191792.06,0.828805
2,0.927985,-0.288305,407934.54,Rangpur,191050.39,0.553781
3,0.873136,-0.038149,383199.62,Dhaka,182901.99,0.649738
4,0.859438,-0.427523,366168.42,Rangpur,166187.94,0.500378


# Scaling - Robust Scaler

In [44]:
from sklearn.preprocessing import RobustScaler
RoSc = RobustScaler()

In [45]:
df3['Transport'] = RoSc.fit_transform(df2[['Transport']])

In [46]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Administration\t
0,0.692617,0.221993,1.552016,Dhaka,192261.83,0.749527
1,0.983359,0.428666,1.383714,Ctg,191792.06,0.828805
2,0.927985,-0.288305,1.166654,Rangpur,191050.39,0.553781
3,0.873136,-0.038149,1.017368,Dhaka,182901.99,0.649738
4,0.859438,-0.427523,0.914576,Rangpur,166187.94,0.500378
