In [1]:
from sklearn.neural_network import MLPClassifier as mlpc
from sklearn.preprocessing import StandardScaler,QuantileTransformer
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, roc_auc_score

import mltools as ml

In [2]:
X = np.genfromtxt('data/X_train.txt', delimiter=',')
Y = np.genfromtxt('data/Y_train.txt', delimiter=',')
np.random.seed(0)

X,Y = ml.shuffleData(X,Y)
X = X[:,:41]

In [3]:
Xtr, Xva, Ytr, Yva = ml.splitData(X,Y,0.7)

## Comparing Preprocessing methods

### Standard Scaler

In [4]:
scaler = StandardScaler()
Xtr_scaled = scaler.fit_transform(Xtr, Ytr)
Xva_scaled = scaler.fit_transform(Xva,Yva)

In [5]:
learner = mlpc(max_iter=1000)#no weights first.
learner.fit(Xtr_scaled,Ytr) #the training

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [6]:
Yva_hat = learner.predict(Xva_scaled)

print(f" auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

 auc: 0.5508985030326493 | mse: 0.44858554108666365


### Quantile Transformer

In [7]:
transformer = QuantileTransformer()
Xtr_transformed = transformer.fit_transform(Xtr)
Xva_transformed = transformer.fit_transform(Xva)

In [8]:
learner = mlpc(max_iter=1000)#no weights first.
learner.fit(Xtr_transformed,Ytr) #the training

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [9]:
Yva_hat = learner.predict(Xva_transformed)

print(f" auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

 auc: 0.5968641114982578 | mse: 0.40278401436910644


### Power Transformer
this one, idk what it even is but it sounds cool

In [10]:
from sklearn.preprocessing import PowerTransformer

transformer = PowerTransformer()
Xtr_transformed = transformer.fit_transform(Xtr)
Xva_transformed = transformer.fit_transform(Xva)

In [11]:
learner = mlpc(max_iter=1000)#no weights first.
learner.fit(Xtr_transformed,Ytr) #the training

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [12]:
Yva_hat = learner.predict(Xva_transformed)

print(f" auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

 auc: 0.56233183313976 | mse: 0.4373596766951055


### Robust Scaler

In [13]:
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
Xtr_scaled = scaler.fit_transform(Xtr, Ytr)
Xva_scaled = scaler.fit_transform(Xva,Yva)

In [14]:
learner = mlpc(max_iter=1000)#no weights first.
learner.fit(Xtr_scaled,Ytr) #the training

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [15]:
Yva_hat = learner.predict(Xva_scaled)

print(f" auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

 auc: 0.5423211866047232 | mse: 0.4575662325999102


### Normalizer

In [16]:
from sklearn.preprocessing import Normalizer

transformer = Normalizer()
Xtr_transformed = transformer.fit_transform(Xtr)
Xva_transformed = transformer.fit_transform(Xva)

In [17]:
learner = mlpc(max_iter=1000)#no weights first.
learner.fit(Xtr_transformed,Ytr) #the training

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [18]:
Yva_hat = learner.predict(Xva_transformed)

print(f" auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

 auc: 0.5349589463156537 | mse: 0.4643017512348451


### Conlusion

Without specifying more parameters, it seems like the Quantile Transformer is the best to use for pre-processing!

# Quantile Transformer Parameter Testing

In [36]:
n_quantiles_testing = [500, 600, 700, 800, 900, 1000, 1100, 1200]

for index, num in enumerate(n_quantiles_testing):
    transformer = QuantileTransformer(n_quantiles=num)
    Xtr_transformed = transformer.fit_transform(Xtr)
    Xva_transformed = transformer.fit_transform(Xva)
    
    learner = mlpc(max_iter=1000)#no weights first.
    learner.fit(Xtr_transformed,Ytr) #the training
    
    Yva_hat = learner.predict(Xva_transformed)

    print(f"n= {num} auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

n= 500 auc: 0.6028669021809265 | mse: 0.3969465648854962
n= 600 auc: 0.5957135598141695 | mse: 0.4041311180960934
n= 700 auc: 0.5998257839721255 | mse: 0.40008980691513246
n= 800 auc: 0.6023313492063492 | mse: 0.3973955994611585
n= 900 auc: 0.5916799748354625 | mse: 0.4081724292770543
n= 1000 auc: 0.6027297877145439 | mse: 0.3973955994611585
n= 1100 auc: 0.5973157826816362 | mse: 0.40233497979344407
n= 1200 auc: 0.6050885598141695 | mse: 0.39470139200718457


In [37]:
# Increasing n-quantiles might be overfitting?
# Decreasing n-quantiles reduces AUC
n_quantiles_testing = [710, 720, 730, 740, 750, 760, 770, 780, 790]

for index, num in enumerate(n_quantiles_testing):
    transformer = QuantileTransformer(n_quantiles=num)
    Xtr_transformed = transformer.fit_transform(Xtr)
    Xva_transformed = transformer.fit_transform(Xva)
    
    learner = mlpc(max_iter=1000)#no weights first.
    learner.fit(Xtr_transformed,Ytr) #the training
    
    Yva_hat = learner.predict(Xva_transformed)

    print(f"n= {num} auc: {roc_auc_score(Yva, Yva_hat)} | mse: {mean_squared_error(Yva, Yva_hat)}")

n= 710 auc: 0.5878508517228029 | mse: 0.4122137404580153
n= 720 auc: 0.5884343947606142 | mse: 0.4113156713066906
n= 730 auc: 0.5992640179377985 | mse: 0.4005388414907948
n= 740 auc: 0.6066750548457865 | mse: 0.3929052537045353
n= 750 auc: 0.5897640018066849 | mse: 0.4095195330040413
n= 760 auc: 0.5902777777777778 | mse: 0.4095195330040413
n= 770 auc: 0.5942424022454511 | mse: 0.4059272563987427
n= 780 auc: 0.5970939798683702 | mse: 0.40323304894476875
n= 790 auc: 0.6027455155503936 | mse: 0.3973955994611585
