In [1]:
import pandas as pd
import numpy as np
import sklearn.preprocessing as skl
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import wrangle

In [2]:
df = wrangle.wrangle_telco()
df

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
0,0013-SMEOE,109.70,71,7904.25
1,0014-BMAQU,84.65,63,5377.80
2,0016-QLJIS,90.45,65,5957.90
3,0017-DINOC,45.20,54,2460.55
4,0017-IUDMW,116.80,72,8456.75
...,...,...,...,...
1690,9964-WBQDJ,24.40,71,1725.40
1691,9972-EWRJS,19.25,67,1372.90
1692,9975-GPKZU,19.75,46,856.50
1693,9993-LHIEB,67.85,67,4627.65


In [3]:
def split_my_data(X, y, train_pct):
    X_train, X_test, y_train, y_test = (
        train_test_split(X, y, train_size = train_pct,
                             random_state=42))
    return X_train, X_test, y_train, y_test

In [4]:
X = df[['monthly_charges', 'tenure']]
y = df['total_charges']
train_pct = .8

X_train, X_test, y_train, y_test = split_my_data(X, y, train_pct)
X_train

Unnamed: 0,monthly_charges,tenure
700,66.25,52
1341,90.95,66
629,93.20,71
1034,80.60,70
1447,20.05,54
...,...,...
1130,105.05,67
1294,20.35,54
860,19.85,41
1459,25.40,12


In [5]:
def standard_scaler(train, test):
#     X_train, X_test, y_train, y_test = (
#         split_my_data(X, y, train_pct))
    
    scaler = skl.StandardScaler()
    scaler.fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    scaler.fit(test)
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

In [6]:
scaler, train_scaled, test_scaled = standard_scaler(X_train, X_test)
train_scaled

Unnamed: 0,monthly_charges,tenure
700,0.180357,-0.253416
1341,0.889548,0.518047
629,0.954151,0.793570
1034,0.592377,0.738465
1447,-1.146148,-0.143207
...,...,...
1130,1.294391,0.573152
1294,-1.137534,-0.143207
860,-1.151890,-0.859565
1459,-0.992538,-2.457596


In [7]:
def scale_inverse(train, test):
    scaler, train_scaled, test_scaled = standard_scaler(train, test)
    train = pd.DataFrame(scaler.inverse_transform(train_scaled),
        columns = train_scaled.columns, index = train_scaled.index)
    
    test = pd.DataFrame(scaler.inverse_transform(test_scaled),
        columns = test_scaled.columns, index = test_scaled.index)
    
    return scaler, train, test

In [8]:
scaler, train, test = scale_inverse(X_train, X_test)
test

Unnamed: 0,monthly_charges,tenure
931,73.35,71.0
1398,25.00,39.0
1561,90.50,47.0
1006,105.35,69.0
506,25.80,68.0
...,...,...
1170,25.20,72.0
1128,85.95,70.0
350,65.15,71.0
887,61.45,37.0


In [9]:
def uniform_scaler(train, test):
#     X_train, X_test, y_train, y_test = (
#         split_my_data(X, y, train_pct))
    
    scaler = skl.QuantileTransformer(n_quantiles=100,
                                output_distribution='uniform')
    scaler.fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    scaler.fit(test)
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

In [10]:
scaler, train_scaled, test_scaled = uniform_scaler(X_train, X_test)
test_scaled

Unnamed: 0,monthly_charges,tenure
931,0.538083,0.722222
1398,0.263677,0.163207
1561,0.750593,0.232323
1006,0.858586,0.621212
506,0.300583,0.585859
...,...,...
1170,0.279056,1.000000
1128,0.676768,0.656566
350,0.464023,0.722222
887,0.423110,0.150547


In [11]:
def gaussian_scaler(train, test):
#     X_train, X_test, y_train, y_test = (
#         split_my_data(X, y, train_pct))
    
    scaler = skl.PowerTransformer(method='yeo-johnson')
    scaler.fit(train)
    
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    scaler.fit(test)
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

In [12]:
scaler, train_scaled, test_scaled = gaussian_scaler(X_train, X_test)
test_scaled

Unnamed: 0,monthly_charges,tenure
931,0.353813,0.864757
1398,-1.158299,-1.282604
1561,0.800019,-0.883325
1006,1.164347,0.686353
506,-1.127929,0.599435
...,...,...
1170,-1.150678,0.956251
1128,0.684556,0.774792
350,0.128556,0.864757
887,0.023916,-1.368794


In [13]:
def min_max_scaler(train, test):
#     X_train, X_test, y_train, y_test = (
#         split_my_data(X, y, train_pct))
    
    scaler = skl.MinMaxScaler()
    scaler.fit(train)
    
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    scaler.fit(test)
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

In [14]:
scaler, train_scaled, test_scaled = min_max_scaler(X_train, X_test)
test_scaled

Unnamed: 0,monthly_charges,tenure
931,0.556801,0.986111
1398,0.064187,0.541667
1561,0.731533,0.652778
1006,0.882832,0.958333
506,0.072338,0.944444
...,...,...
1170,0.066225,1.000000
1128,0.685176,0.972222
350,0.473255,0.986111
887,0.435558,0.513889


In [15]:
def iqr_robust_scaler(train, test):
#     X_train, X_test, y_train, y_test = (
#         split_my_data(X, y, train_pct))
    
    scaler = skl.RobustScaler(quantile_range=(25.0,75.0))
    scaler.fit(train)
    
    train_scaled = pd.DataFrame(scaler.transform(train), 
                    columns=train.columns, index=train.index)
    
    scaler.fit(test)
    test_scaled = pd.DataFrame(scaler.transform(test), 
                    columns=test.columns, index=test.index)
    
    return scaler, train_scaled, test_scaled

In [16]:
scaler,train_scaled,test_scaled = iqr_robust_scaler(X_train,X_test)
test_scaled

Unnamed: 0,monthly_charges,tenure
931,0.054774,0.279070
1398,-0.680867,-1.209302
1561,0.315709,-0.837209
1006,0.541651,0.186047
506,-0.668695,0.139535
...,...,...
1170,-0.677824,0.325581
1128,0.246482,0.232558
350,-0.069989,0.279070
887,-0.126284,-1.302326
