In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
from models import Autoencoder, EncoderStack
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from tensorflow.keras.utils import to_categorical

# Load and check data

In [2]:
FILE_NAME = "GEO_data_batch_corr_final"

N_LAYERS = 3
N_NODES = [2000, 1000, 500]
DROPOUT = [0.1]
BATCH_SIZE = 3
EPOCHS = 10
TEST_RATIO = 0.15

assert len(N_NODES) == N_LAYERS or len(N_NODES) == 1
assert len(DROPOUT) == N_LAYERS or len(DROPOUT) == 1

In [3]:
dataframe = pd.read_pickle('data/pd/'+FILE_NAME)
print("Loaded {} samples with {} features.".format(dataframe.shape[0], dataframe.shape[1]))
data = dataframe.values
data = normalize(data)

classes = np.random.randint(0,1, dataframe.shape[0])
classes = to_categorical(classes)

x_train, x_test, y_train, y_test = train_test_split(data, classes, test_size=TEST_RATIO)
print("Keeping {} of samples for training: {} training, {} testing.".format(TEST_RATIO, x_train.shape[0], x_test.shape[0]))

Loaded 954 samples with 6785 features.
Keeping 0.15 of samples for training: 810 training, 144 testing.


# Train encoder layers

In [4]:
encoder_models = []

x_train_out, x_test_out = x_train, x_test
for idx, num_hidden in enumerate(N_NODES):
    print("\n##################################################################")
    print("Training layer {} with {} hidden nodes..\n".format(idx, num_hidden))
    encoder = Autoencoder(x_train_out.shape[1], num_hidden, "output/")
    recon_mse = encoder.fit(x_train_out, x_test_out, batch_size=BATCH_SIZE, num_epochs=EPOCHS)
    x_train_out = encoder.encoder_model.predict(x_train_out)
    x_test_out = encoder.encoder_model.predict(x_test_out)
    print("\nTraining losss: ", recon_mse[0])
    print("\nTesting loss: ", recon_mse[1])
    encoder_models.append(encoder)


##################################################################
Training layer 0 with 2000 hidden nodes..

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: output/20200727-035350/autoencoder-2000/assets

Training losss:  7.0468855e-06

Testing loss:  7.0703354e-06

##################################################################
Training layer 1 with 1000 hidden nodes..

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
INFO:tensorflow:Assets written to: output/20200727-040256/autoencoder-1000/assets

Training losss:  6.4089695e-06

Testing loss:  6.4348396e-06

##################################################################
Training layer 2 with 500 hidden nodes..

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
INFO:tensorflow:Assets written to: output/20200727-040457/autoencoder-500/asset

# Train encoder stack

In [26]:
model = EncoderStack(encoder_models, 'output/')

print("\n##################################################################")
print("Training layer {} with {} hidden nodes..\n".format(idx, num_hidden))
loss_train, loss_test = model.fit(x_train, y_train, x_test, y_test, batch_size=BATCH_SIZE, num_epochs=EPOCHS)

print("\nTraining losss: ", loss_train)
print("\nTesting loss: ", loss_test)


##################################################################
Training layer 1 with 50 hidden nodes..

Epoch 1/3
Epoch 2/3
Epoch 3/3
INFO:tensorflow:Assets written to: output/20200727-031923/encoder_stack/assets

Training losss:  0.023681095

Testing loss:  0.023681173
