# Scaling Assignment

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('supershops.csv')

In [3]:
df2 = df.copy()
df3 = df.copy()
df4 = df.copy()
df5 = df.copy()

In [4]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [5]:
df.isnull().sum()          #to see the NULL or MISSING or NaN(Not a Number) value in DATASET

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [6]:
df.dropna().head()            #to DROP the NaN Row; here ROW 19 is deleted

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [7]:
df.shape

(50, 5)

In [8]:
df.dropna().shape

(49, 5)

In [9]:
#df = df.dropna()           #this is one way to UPDATE the "df" value
df.dropna(inplace=True)     #this is another way to UPDATE the "df" value

In [10]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


# Scaling through "Normalization"

In [11]:
from sklearn.preprocessing import MinMaxScaler

In [12]:
m = MinMaxScaler()        #Creating an 'OBJECT/MODEL' for 'MinMaxScaler'

In [13]:
df['Marketing Spend'] = m.fit_transform(df[['Marketing Spend']]) # fit() = training the model ; transform() = replace the values

In [14]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,136897.8,471784.1,Dhaka,192261.83
1,0.983359,151377.59,443898.53,Ctg,191792.06
2,0.927985,101145.55,407934.54,Rangpur,191050.39
3,0.873136,118671.85,383199.62,Dhaka,182901.99
4,0.859438,91391.77,366168.42,Rangpur,166187.94


Another way to Normalize a column

In [15]:
m.fit(df[['Administration']])        #just fit "Administration" column

MinMaxScaler()

In [16]:
df.head()              # no changes in "Administration" column as we don't "Transform"

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,136897.8,471784.1,Dhaka,192261.83
1,0.983359,151377.59,443898.53,Ctg,191792.06
2,0.927985,101145.55,407934.54,Rangpur,191050.39
3,0.873136,118671.85,383199.62,Dhaka,182901.99
4,0.859438,91391.77,366168.42,Rangpur,166187.94


In [17]:
df['Administration_Transform'] = m.transform(df[['Administration']])

In [18]:
df['Administration_Transform'].head()

0    0.651744
1    0.761972
2    0.379579
3    0.512998
4    0.305328
Name: Administration_Transform, dtype: float64

In [19]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Administration_Transform
0,0.692617,136897.8,471784.1,Dhaka,192261.83,0.651744
1,0.983359,151377.59,443898.53,Ctg,191792.06,0.761972
2,0.927985,101145.55,407934.54,Rangpur,191050.39,0.379579
3,0.873136,118671.85,383199.62,Dhaka,182901.99,0.512998
4,0.859438,91391.77,366168.42,Rangpur,166187.94,0.305328


Scaling of 'Profit' through 'Normalization'

In [20]:
m.fit(df[['Profit']])

MinMaxScaler()

In [21]:
df['Profit_Transform'] = m.transform(df[['Profit']])

In [22]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Administration_Transform,Profit_Transform
0,0.692617,136897.8,471784.1,Dhaka,192261.83,0.651744,1.0
1,0.983359,151377.59,443898.53,Ctg,191792.06,0.761972,0.997355
2,0.927985,101145.55,407934.54,Rangpur,191050.39,0.379579,0.993178
3,0.873136,118671.85,383199.62,Dhaka,182901.99,0.512998,0.947292
4,0.859438,91391.77,366168.42,Rangpur,166187.94,0.305328,0.853171


# Scaling through "Standardization"

In [23]:
#https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

In [24]:
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [25]:
df2.shape

(50, 5)

In [26]:
df2.isnull().sum()

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [27]:
df2.dropna().head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [28]:
df2.dropna().shape

(49, 5)

In [29]:
df2 = df2.dropna()

In [30]:
df2.shape

(49, 5)

Model Creation

In [31]:
from sklearn.preprocessing import StandardScaler

In [32]:
s = StandardScaler()     #Creating an 'OBJECT/MODEL' for 'StandardScaler'

In [33]:
s.fit(df2[['Profit']])

StandardScaler()

In [34]:
df2['Profit_Transform'] = s.transform(df2[['Profit']])

In [35]:
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Profit_Transform
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1.997924
1,162597.7,151377.59,443898.53,Ctg,191792.06,1.986261
2,153441.51,101145.55,407934.54,Rangpur,191050.39,1.967846
3,144372.41,118671.85,383199.62,Dhaka,182901.99,1.765533
4,142107.34,91391.77,366168.42,Rangpur,166187.94,1.350548


# Scaling through "Max Abs Scaler"

In [36]:
#https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html

In [37]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [38]:
df3.isnull().sum()

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [39]:
df3.dropna().head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [40]:
df3.dropna().shape

(49, 5)

In [41]:
df3 = df3.dropna()

In [42]:
df3.shape

(49, 5)

Model Creation

In [43]:
from sklearn.preprocessing import MaxAbsScaler

In [44]:
mas = MaxAbsScaler()

In [45]:
mas.fit(df3[['Profit']])

MaxAbsScaler()

In [46]:
df3['Profit_Transform'] = mas.transform(df3[['Profit']])

In [47]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Profit_Transform
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1.0
1,162597.7,151377.59,443898.53,Ctg,191792.06,0.997557
2,153441.51,101145.55,407934.54,Rangpur,191050.39,0.993699
3,144372.41,118671.85,383199.62,Dhaka,182901.99,0.951317
4,142107.34,91391.77,366168.42,Rangpur,166187.94,0.864383


# Scaling through "Robust Scaler"

In [48]:
# video: https://youtu.be/U9N-ELpCpc8
#https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html

In [49]:
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [50]:
df4.isnull().sum()

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [51]:
df4.dropna().head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [52]:
df4.dropna().shape

(49, 5)

Model Creation

In [53]:
from sklearn.preprocessing import RobustScaler

In [54]:
RoSc = RobustScaler()

In [55]:
RoSc.fit(df4[['Profit']])

RobustScaler()

In [56]:
df4['Profit_Transform'] = RoSc.transform(df4[['Profit']])

In [57]:
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Profit_Transform
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1.69834
1,162597.7,151377.59,443898.53,Ctg,191792.06,1.688874
2,153441.51,101145.55,407934.54,Rangpur,191050.39,1.673929
3,144372.41,118671.85,383199.62,Dhaka,182901.99,1.509736
4,142107.34,91391.77,366168.42,Rangpur,166187.94,1.172943


# Scaling through "Max"

In [58]:
df5.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [59]:
max_val = df5['Profit'].max()

In [60]:
max_val

192261.83

In [61]:
df5['Profit_Transform'] = df5['Profit'] / max_val

In [62]:
df5.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Profit_Transform
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1.0
1,162597.7,151377.59,443898.53,Ctg,191792.06,0.997557
2,153441.51,101145.55,407934.54,Rangpur,191050.39,0.993699
3,144372.41,118671.85,383199.62,Dhaka,182901.99,0.951317
4,142107.34,91391.77,366168.42,Rangpur,166187.94,0.864383
