In [1]:
import pandas as pd

### Load Data

In [2]:
from google.colab import files
uploaded = files.upload()

Saving auto-mpg.csv to auto-mpg.csv


In [4]:
df = pd.read_csv('auto-mpg.csv')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [5]:
df.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger
397,31.0,4,119.0,82,2720,19.4,82,1,chevy s-10


In [6]:
# check data type
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower       object
weight            int64
acceleration    float64
model year        int64
origin            int64
car name         object
dtype: object

- Except Car name & horsepower (object) we can apply robust scalar on any other variables (numeric) only
- For this case I will take displacement & weight

### Variable (X, y)

In [7]:
X = df.iloc[:, [2, 4]]
y = df.iloc[:, 5]

In [8]:
X.head()

Unnamed: 0,displacement,weight
0,307.0,3504
1,350.0,3693
2,318.0,3436
3,304.0,3433
4,302.0,3449


In [9]:
y.head()

0    12.0
1    11.5
2    11.0
3    12.0
4    10.5
Name: acceleration, dtype: float64

- Import Robust Scalar & Train test from sklearn
- Robust scalar will be used to standardize the dataframe

In [11]:
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [13]:
X_train.head()

Unnamed: 0,displacement,weight
245,98.0,1800
110,108.0,2379
16,199.0,2774
66,304.0,3672
153,250.0,3459


- We are going the perform scaler on Xtrain first then apply on X_test as well

In [14]:
# Object creation
scaler = RobustScaler()           # or  scaler = RobustScaler().fit(X_train)
print(scaler)

RobustScaler()


In [17]:
scaler.fit(X_train)

In [18]:
# check attributes, it will return 2, because of two variable Displacment & weight
scaler.n_features_in_

2

In [19]:
# Now Transforme, it will return standarsize value of our dataframe, 1st col fro displacement & 2nd col for weights
scaler.transform(X_train)

array([[-0.34304207, -0.782042  ],
       [-0.27831715, -0.36278059],
       [ 0.31067961, -0.07675597],
       [ 0.99029126,  0.57349747],
       [ 0.6407767 ,  0.4192614 ],
       [ 1.87055016,  1.03692976],
       [-0.34951456, -0.75669804],
       [ 1.28802589,  1.31716148],
       [-0.29773463, -0.49963794],
       [-0.42718447, -0.65532223],
       [-0.51779935, -0.75597393],
       [-0.28478964, -0.30123099],
       [ 0.75080906,  0.5249819 ],
       [-0.38834951, -0.66256336],
       [-0.19417476, -0.15206372],
       [-0.07119741, -0.34178132],
       [-0.35598706, -0.50036206],
       [ 0.4789644 ,  0.34757422],
       [-0.38834951, -0.54308472],
       [ 0.0776699 ,  0.38377987],
       [ 1.28802589,  1.12961622],
       [-0.07119741, -0.34685011],
       [ 0.3171521 , -0.2121651 ],
       [ 0.3171521 ,  0.19913106],
       [ 0.31067961, -0.16799421],
       [ 0.11003236,  0.01448226],
       [-0.38834951, -0.6480811 ],
       [ 0.        ,  0.05068791],
       [-0.34304207,

- This starndardize value can be saved in a variable for future ref

In [20]:
# store standardized value
X_trained_scaled = scaler.transform(X_train)
print(X_trained_scaled)

[[-0.34304207 -0.782042  ]
 [-0.27831715 -0.36278059]
 [ 0.31067961 -0.07675597]
 [ 0.99029126  0.57349747]
 [ 0.6407767   0.4192614 ]
 [ 1.87055016  1.03692976]
 [-0.34951456 -0.75669804]
 [ 1.28802589  1.31716148]
 [-0.29773463 -0.49963794]
 [-0.42718447 -0.65532223]
 [-0.51779935 -0.75597393]
 [-0.28478964 -0.30123099]
 [ 0.75080906  0.5249819 ]
 [-0.38834951 -0.66256336]
 [-0.19417476 -0.15206372]
 [-0.07119741 -0.34178132]
 [-0.35598706 -0.50036206]
 [ 0.4789644   0.34757422]
 [-0.38834951 -0.54308472]
 [ 0.0776699   0.38377987]
 [ 1.28802589  1.12961622]
 [-0.07119741 -0.34685011]
 [ 0.3171521  -0.2121651 ]
 [ 0.3171521   0.19913106]
 [ 0.31067961 -0.16799421]
 [ 0.11003236  0.01448226]
 [-0.38834951 -0.6480811 ]
 [ 0.          0.05068791]
 [-0.34304207 -0.54670529]
 [-0.20064725 -0.17740768]
 [-0.4012945  -0.66039102]
 [-0.44012945 -0.63504707]
 [-0.42718447 -0.55756698]
 [ 0.52427184  0.23968139]
 [-0.2394822  -0.21578566]
 [ 0.6407767   0.34250543]
 [ 0.69255663  0.22664736]
 

- Now applying on X_test data

In [21]:
scaler.fit_transform(X_test)

array([[-0.24317618, -0.68253373],
       [-0.09925558, -0.06034483],
       [ 0.45657568, -0.0535982 ],
       [ 0.88337469,  0.92541229],
       [ 0.88337469,  1.14805097],
       [-0.2133995 , -0.45389805],
       [ 0.        ,  0.11956522],
       [ 1.48883375,  1.52136432],
       [ 0.5955335 ,  1.01536732],
       [ 0.29776675,  0.12706147],
       [ 1.24069479,  0.85794603],
       [-0.24317618, -0.55509745],
       [-0.24317618, -0.5625937 ],
       [ 0.81389578,  0.88943028],
       [ 0.        , -0.12256372],
       [ 1.20595533,  1.68628186],
       [-0.13895782, -0.09782609],
       [ 0.11414392,  0.32571214],
       [ 1.29032258,  1.13530735],
       [-0.24317618, -0.55134933],
       [-0.0942928 , -0.1458021 ],
       [ 0.28784119,  0.09557721],
       [ 0.45657568,  0.09707646],
       [-0.20843672, -0.47263868],
       [ 0.82878412,  0.59857571],
       [-0.2133995 , -0.4314093 ],
       [-0.20843672, -0.3302099 ],
       [ 0.05459057,  0.02586207],
       [ 0.58560794,

In [22]:
X_test_scaled = scaler.fit_transform(X_test)
print(X_test_scaled)

[[-0.24317618 -0.68253373]
 [-0.09925558 -0.06034483]
 [ 0.45657568 -0.0535982 ]
 [ 0.88337469  0.92541229]
 [ 0.88337469  1.14805097]
 [-0.2133995  -0.45389805]
 [ 0.          0.11956522]
 [ 1.48883375  1.52136432]
 [ 0.5955335   1.01536732]
 [ 0.29776675  0.12706147]
 [ 1.24069479  0.85794603]
 [-0.24317618 -0.55509745]
 [-0.24317618 -0.5625937 ]
 [ 0.81389578  0.88943028]
 [ 0.         -0.12256372]
 [ 1.20595533  1.68628186]
 [-0.13895782 -0.09782609]
 [ 0.11414392  0.32571214]
 [ 1.29032258  1.13530735]
 [-0.24317618 -0.55134933]
 [-0.0942928  -0.1458021 ]
 [ 0.28784119  0.09557721]
 [ 0.45657568  0.09707646]
 [-0.20843672 -0.47263868]
 [ 0.82878412  0.59857571]
 [-0.2133995  -0.4314093 ]
 [-0.20843672 -0.3302099 ]
 [ 0.05459057  0.02586207]
 [ 0.58560794  0.52811094]
 [ 1.29032258  1.64955022]
 [ 0.          0.06334333]
 [-0.17369727 -0.54385307]
 [ 1.04218362  0.89542729]
 [-0.13399504 -0.32046477]
 [-0.26799007 -0.51461769]
 [ 1.29032258  1.17878561]
 [ 0.07444169 -0.17503748]
 