In [28]:
#Importing the Libraries
from sklearn.datasets import make_classification
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import load_model

In [29]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=100, n_informative=10, n_redundant=90, random_state=1)
# summarize the dataset
print(X.shape, y.shape)

(1000, 100) (1000,)


In [30]:
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(670, 100)
(330, 100)
(670,)
(330,)


In [31]:
# AutoEncoder Model Preparation
n_inputs = X.shape[1]
# define encoder
input_data_shape= Input(shape=(n_inputs,))
# encoder level 1
encoder= Dense(n_inputs*2)(input_data_shape)
encoder = BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# encoder level 2
encoder= Dense(n_inputs)(encoder)
encoder= BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# bottleneck
n_bottleneck = round(float(n_inputs) / 2.0)
bottleneck = Dense(n_bottleneck)(encoder)
# define decoder, level 1
decoder = Dense(n_inputs)(bottleneck)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)
# decoder level 2
decoder = Dense(n_inputs*2)(decoder)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)

In [32]:
# output layer
output = Dense(n_inputs, activation='linear')(decoder)
# define autoencoder model
model = Model(inputs=input_data_shape, outputs=output)
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')

In [33]:
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
dense_12 (Dense)             (None, 200)               20200     
_________________________________________________________________
batch_normalization_8 (Batch (None, 200)               800       
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 200)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 100)               20100     
_________________________________________________________________
batch_normalization_9 (Batch (None, 100)               400       
_________________________________________________________________
leaky_re_lu_9 (LeakyReLU)    (None, 100)               0   

In [34]:
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs=50, batch_size=16, verbose=2, validation_data=(X_test,X_test))

Train on 670 samples, validate on 330 samples
Epoch 1/50
670/670 - 2s - loss: 0.2409 - val_loss: 0.1894
Epoch 2/50
670/670 - 0s - loss: 0.0400 - val_loss: 0.1192
Epoch 3/50
670/670 - 0s - loss: 0.0235 - val_loss: 0.0640
Epoch 4/50
670/670 - 0s - loss: 0.0194 - val_loss: 0.0343
Epoch 5/50
670/670 - 0s - loss: 0.0160 - val_loss: 0.0192
Epoch 6/50
670/670 - 0s - loss: 0.0141 - val_loss: 0.0136
Epoch 7/50
670/670 - 0s - loss: 0.0133 - val_loss: 0.0122
Epoch 8/50
670/670 - 0s - loss: 0.0122 - val_loss: 0.0090
Epoch 9/50
670/670 - 0s - loss: 0.0116 - val_loss: 0.0075
Epoch 10/50
670/670 - 0s - loss: 0.0110 - val_loss: 0.0072
Epoch 11/50
670/670 - 0s - loss: 0.0111 - val_loss: 0.0085
Epoch 12/50
670/670 - 0s - loss: 0.0107 - val_loss: 0.0056
Epoch 13/50
670/670 - 0s - loss: 0.0099 - val_loss: 0.0058
Epoch 14/50
670/670 - 0s - loss: 0.0096 - val_loss: 0.0052
Epoch 15/50
670/670 - 0s - loss: 0.0092 - val_loss: 0.0048
Epoch 16/50
670/670 - 0s - loss: 0.0083 - val_loss: 0.0050
Epoch 17/50
670/670

In [35]:
# define an encoder model (without the decoder)
encoder = Model(inputs=input_data_shape, outputs=bottleneck)
# save the encoder to file
encoder.save('encoder.h5')

In [36]:
#Building a Base Model to compare the performance after compressing the data using Encoder model.

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
# define model
model = LogisticRegression()
# fit model on training set
model.fit(X_train, y_train)
# make prediction on test set
yhat = model.predict(X_test)
# calculate accuracy
acc = accuracy_score(y_test, yhat)
print(acc)

0.8939393939393939


In [37]:
#Compressing the input data using Encoder Model and fitting it on the Logistic Regression model.
# load the model from file
encoder = load_model('encoder.h5')

# encode the train data
X_train_encode = encoder.predict(X_train)
# encode the test data
X_test_encode = encoder.predict(X_test)
# define the model
model = LogisticRegression(max_iter=1000)
# fit the model on the training set
model.fit(X_train_encode, y_train)
# make predictions on the test set
yhat = model.predict(X_test_encode)
# calculate classification accuracy
acc = accuracy_score(y_test, yhat)
print(acc)

0.9272727272727272
