## 0 - Import Libraries and Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../C_Datasets/titanic_data_train.csv').iloc[:, -1:-4:-1]
df.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,7.25,21171
1,1,71.2833,17599
2,1,7.925,3101282
3,1,53.1,113803
4,0,8.05,373450


## 1 - Standardization

📌 Scaling features to have a mean of 0 and a standard deviation of 1.

### 1.1 - Implementation in Pandas (Z-Score)

In [3]:
def z_score_function(DataFrame, column):
    return (DataFrame[column] - DataFrame[column].mean()) / DataFrame[column].std()

In [4]:
df_1 = df.copy()
df_1['Fare'] = z_score_function(df_1, 'Fare')
df_1['Ticket'] = z_score_function(df_1, 'Ticket')

In [5]:
df_1.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,-0.502163,-0.422866
1,1,0.786404,-0.428255
2,1,-0.48858,4.224025
3,1,0.420494,-0.283114
4,0,-0.486064,0.108609


### 1.2 - Using scipy.stats() (Z-Score)

In [6]:
from scipy.stats import zscore

In [7]:
df_2 = df.copy()
df_2['Ticket'] = zscore(df_2['Ticket'])
df_2['Fare'] = zscore(df_2['Fare'])

In [8]:
df_2.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,-0.502445,-0.423103
1,1,0.786845,-0.428495
2,1,-0.488854,4.226398
3,1,0.42073,-0.283273
4,0,-0.486337,0.10867


### 1.3 - Using sci-kit learn (StandardScaler)

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
df_3 = df.copy()

In [11]:
scaler = StandardScaler()

In [12]:
df_3_scaled = scaler.fit_transform(df_3[['Fare', 'Ticket']])

In [13]:
df_3[['Fare', 'Ticket']] = pd.DataFrame(df_3_scaled, columns=['Fare', 'Ticket'])

In [14]:
df_3.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,-0.502445,-0.423103
1,1,0.786845,-0.428495
2,1,-0.488854,4.226398
3,1,0.42073,-0.283273
4,0,-0.486337,0.10867


## 2 - Normalization

📌 Scaling features to a fixed range, usually between 0 and 1.

### 2.1 - Using  preprocessing.normalize()

In [15]:
from sklearn.preprocessing import normalize

In [16]:
df_4 = df.copy()

In [17]:
df_4_normalized = normalize(df_4[['Fare', 'Ticket']])

In [18]:
df_4[['Fare', 'Ticket']] = pd.DataFrame(df_4_normalized, columns=['Fare', 'Ticket'])

In [19]:
df_4.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,0.000342,1.0
1,1,0.00405,0.999992
2,1,3e-06,1.0
3,1,0.000467,1.0
4,0,2.2e-05,1.0


### 2.2 - Using preprocessing.MinMaxScaler()

In [20]:
from sklearn.preprocessing import MinMaxScaler

In [21]:
scaler = MinMaxScaler()

In [22]:
df_5 = df.copy()
df_5_scaler = scaler.fit_transform(df_5[['Fare', 'Ticket']])

In [23]:
df_5[['Fare', 'Ticket']] = pd.DataFrame(df_5_scaler, columns=['Fare', 'Ticket'])

In [24]:
df_5.head()

Unnamed: 0,Survived,Fare,Ticket
0,0,0.014151,0.006825
1,1,0.139136,0.005674
2,1,0.015469,0.999989
3,1,0.103644,0.036694
4,0,0.015713,0.120416


## 3 - Robust Scaler

📌 Robust Scaler are robust to outlier.

X_scaled = (X - X.median) / IQR

In [25]:
from sklearn.preprocessing import RobustScaler

In [26]:
robust_scaler = RobustScaler()

In [27]:
df_6 = df.copy()
df_6_scaler = robust_scaler.fit_transform(df_6[['Fare', 'Ticket']])

In [28]:
df_6[['Fare', 'Ticket']] = pd.DataFrame(df_6_scaler, columns=['Fare', 'Ticket'])

In [30]:
df_6

Unnamed: 0,Survived,Fare,Ticket
0,0,-0.312011,-0.276509
1,1,2.461242,-0.287261
2,1,-0.282777,8.995265
3,1,1.673732,0.002333
4,0,-0.277363,0.783924
...,...,...,...
886,0,-0.062981,0.296530
887,1,0.673281,-0.002935
888,0,0.389604,-0.320349
889,1,0.673281,-0.004994
