# Feature Scaling Solution Notebook

### Importing Libraries

In [241]:
import pandas as pd
import numpy as np

### Importing Dataset

In [242]:
df = pd.read_csv("Dataset_03.csv")
df.head(15)

Unnamed: 0,Australia,Canada,Dubai,USA,Salary,YearsExperience,Purchased
0,0,0,1,0,39343,1.1,0
1,0,1,0,0,46205,1.3,1
2,0,1,0,0,37731,1.5,0
3,0,1,0,0,43525,2.0,0
4,0,0,0,1,39891,2.2,0
5,0,0,1,0,56642,2.9,0
6,0,1,0,0,60150,3.0,1
7,1,0,0,0,54445,3.2,0
8,0,0,1,0,64445,3.2,1
9,0,0,1,0,57189,3.7,0


In [243]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

### Splitting Dataset

In [244]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size = 0.3, random_state = 1) 

In [245]:
X_train, X_test, y_train, y_test

(    Australia  Canada  Dubai  USA  Salary  YearsExperience
 22          1       0      0    0  101302              7.9
 23          1       0      0    0  113812              8.2
 4           0       0      0    1   39891              2.2
 2           0       1      0    0   37731              1.5
 25          1       0      0    0  105582              9.0
 6           0       1      0    0   60150              3.0
 18          1       0      0    0   81363              5.9
 13          0       1      0    0   57081              4.1
 7           1       0      0    0   54445              3.2
 27          0       0      0    1  112635              9.6
 1           0       1      0    0   46205              1.3
 16          0       0      0    1   66029              5.1
 0           0       0      1    0   39343              1.1
 15          0       0      0    1   67938              4.9
 29          0       1      0    0  121872             10.5
 28          0       0      0    1  1223

### Perform Feature Scaling

1. **Simple Feature Scaling**  

    $X_{\text{scaled}} = \dfrac{X}{\max(X)}$

2. **Standardization**  

    $X_{\text{stand}} = \dfrac{X - \mu}{\sigma}$  
    where $\mu$ is the mean and $\sigma$ is the standard deviation.

* usefull in all type of cases

3. **Normalization**  

    $X_{\text{norm}} = \dfrac{X - X_{\min}}{X_{\max} - X_{\min}}$

* Normal Distribution

In [246]:
from sklearn.preprocessing import StandardScaler

In [247]:
scaler_partial = StandardScaler()
X_train_scaled = X_train.copy()
X_train_scaled.iloc[:, 4:] = scaler_partial.fit_transform(X_train_scaled.iloc[:, 4:])
X_train_scaled.head()

  0.32586056 -0.54948565 -0.64451129  1.45319054 -0.94155654 -0.22691758
 -1.18892601 -0.15809969  1.78617683  1.80488635 -0.54559233 -0.28401948
 -0.55395574 -0.59588094 -0.56531124]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X_train_scaled.iloc[:, 4:] = scaler_partial.fit_transform(X_train_scaled.iloc[:, 4:])


Unnamed: 0,Australia,Canada,Dubai,USA,Salary,YearsExperience
22,1,0,0,0,1.044645,0.971797
23,1,0,0,0,1.49562,1.073328
4,0,0,0,1,-1.169171,-0.957293
2,0,1,0,0,-1.247037,-1.194199
25,1,0,0,0,1.198936,1.344078


In [248]:
X_test_scaled = X_test.copy()
X_test_scaled.iloc[:,4:] = scaler_partial.transform(X_test_scaled.iloc[:,4:])
X_test_scaled.head()

  1.60942769 -1.03816834  1.33768897]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X_test_scaled.iloc[:,4:] = scaler_partial.transform(X_test_scaled.iloc[:,4:])


Unnamed: 0,Australia,Canada,Dubai,USA,Salary,YearsExperience
17,0,1,0,0,0.388045,0.091861
21,1,0,0,0,0.935452,0.701048
10,0,0,1,0,-0.328252,-0.38195
19,0,1,0,0,0.779251,0.328767
14,1,0,0,0,-0.404207,-0.178888


### Check it 

In [249]:
X_train

Unnamed: 0,Australia,Canada,Dubai,USA,Salary,YearsExperience
22,1,0,0,0,101302,7.9
23,1,0,0,0,113812,8.2
4,0,0,0,1,39891,2.2
2,0,1,0,0,37731,1.5
25,1,0,0,0,105582,9.0
6,0,1,0,0,60150,3.0
18,1,0,0,0,81363,5.9
13,0,1,0,0,57081,4.1
7,1,0,0,0,54445,3.2
27,0,0,0,1,112635,9.6


In [250]:
X_test

Unnamed: 0,Australia,Canada,Dubai,USA,Salary,YearsExperience
17,0,1,0,0,83088,5.3
21,1,0,0,0,98273,7.1
10,0,0,1,0,63218,3.9
19,0,1,0,0,93940,6.0
14,1,0,0,0,61111,4.5
20,0,1,0,0,91738,6.8
26,0,0,1,0,116969,9.5
3,0,1,0,0,43525,2.0
24,0,0,1,0,109431,8.7


In [251]:
X_train_scaled,X_test_scaled

(    Australia  Canada  Dubai  USA    Salary  YearsExperience
 22          1       0      0    0  1.044645         0.971797
 23          1       0      0    0  1.495620         1.073328
 4           0       0      0    1 -1.169171        -0.957293
 2           0       1      0    0 -1.247037        -1.194199
 25          1       0      0    0  1.198936         1.344078
 6           0       1      0    0 -0.438851        -0.686543
 18          1       0      0    0  0.325861         0.294924
 13          0       1      0    0 -0.549486        -0.314263
 7           1       0      0    0 -0.644511        -0.618856
 27          0       0      0    1  1.453191         1.547140
 1           0       1      0    0 -0.941557        -1.261886
 16          0       0      0    1 -0.226918         0.024174
 0           0       0      1    0 -1.188926        -1.329573
 15          0       0      0    1 -0.158100        -0.043513
 29          0       1      0    0  1.786177         1.851733
 28     