In [95]:
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import metrics, Input
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [96]:
METRICS = [metrics.RootMeanSquaredError(name='rms'), metrics.MeanAbsoluteError(name='mae')]

In [97]:
ENCODING_DIM = 16 #Desired Dimension
BATCH_SIZE = 64
EPOCHS = 50

In [98]:
def make_and_train_autoencoder(X_train, metrics=METRICS):
    
    len_input_output = X_train.shape[-1]
    input_ = Input(shape=(len_input_output,))
    encoded = Dense(units=ENCODING_DIM*2, activation="relu")(input_)
    bottleneck = Dense(units=ENCODING_DIM, 
                       activation="relu")(encoded)
    decoded = Dense(units=ENCODING_DIM*2, 
                    activation="relu")(bottleneck)
    output = Dense(units=len_input_output, 
                    activation="linear")(decoded)
    #Training is performed on the entire autoencoder
    autoencoder = Model(inputs=input_, outputs=output)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error',
                        metrics=[metrics])
    autoencoder.fit(X_train, X_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)
    #Use only the encoder part for dimensionality reduction
    encoder = Model(inputs=input_, outputs=bottleneck)
    return autoencoder, encoder

In [99]:
df = pd.read_csv('training_ROS.csv')
qw = pd.read_csv('validation_set.csv')

In [100]:
X, y = df.iloc[:,:-1], df.iloc[:, -1]

In [101]:
auto, encode = make_and_train_autoencoder(X, metrics=METRICS)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [102]:
q = encode.predict(X)

In [103]:
q.shape

(32710, 16)

In [104]:
q = np.array(q)
df = pd.DataFrame(data=q,index=None,columns=None)

In [105]:
df['label'] = y
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,label
0,0.0,0.176377,0.297052,0.0,0.168536,0.0,0.091217,0.177422,0.192657,0.114414,0.042181,0.0,0.303161,0.158141,0.0,0.164332,0.0
1,0.0,0.145195,0.334893,0.0,0.130104,0.0,0.108132,0.276668,0.220707,0.228691,0.136393,0.0,0.342791,0.180877,0.0,0.196175,1.0
2,0.0,0.315034,0.246673,0.0,0.349820,0.0,0.284155,0.224775,0.130678,0.121905,0.173306,0.0,0.310383,0.088846,0.0,0.401038,0.0
3,0.0,0.155673,0.372546,0.0,0.313895,0.0,0.250187,0.228466,0.118142,0.129902,0.078225,0.0,0.400816,0.140651,0.0,0.108406,0.0
4,0.0,0.178996,0.244286,0.0,0.225383,0.0,0.261496,0.113891,0.078897,0.074989,0.069801,0.0,0.212734,0.120407,0.0,0.058746,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32705,0.0,0.200497,0.121754,0.0,0.119313,0.0,0.280635,0.120347,0.068865,0.063566,0.045507,0.0,0.094394,0.161756,0.0,0.071938,1.0
32706,0.0,0.354684,0.316169,0.0,0.300437,0.0,0.122386,0.171823,0.259745,0.170999,0.209344,0.0,0.518709,0.172038,0.0,0.485416,1.0
32707,0.0,0.331742,0.354997,0.0,0.313935,0.0,0.091947,0.291601,0.205047,0.139764,0.199323,0.0,0.289754,0.222683,0.0,0.496981,1.0
32708,0.0,0.324100,0.248483,0.0,0.334480,0.0,0.270242,0.115537,0.002554,0.023827,0.143548,0.0,0.096559,0.086054,0.0,0.354157,1.0


In [106]:
df.to_csv('training_ros_autoencoded_4.csv', index=False)

In [107]:
#### CNN autoencode
df = pd.read_csv('training_CNN.csv')
X, y = df.iloc[:,:-1], df.iloc[:, -1]
auto, encode = make_and_train_autoencoder(X, metrics=METRICS)
q = encode.predict(X)
w = np.array(q)
df = pd.DataFrame(data=w,index=None,columns=None)
df['label'] = y
df.to_csv('training_cnn_autoencoded_4.csv', index=False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [108]:
#### CNN autoencode
df = pd.read_csv('training_RUS.csv')
X, y = df.iloc[:,:-1], df.iloc[:, -1]
auto, encode = make_and_train_autoencoder(X, metrics=METRICS)
q = encode.predict(X)
w = np.array(q)
df = pd.DataFrame(data=w,index=None,columns=None)
df['label'] = y
df.to_csv('training_rus_autoencoded_4.csv', index=False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [109]:
#### CNN autoencode
df = pd.read_csv('validation.csv')
X, y = df.iloc[:,:-1], df.iloc[:, -1]
auto, encode = make_and_train_autoencoder(X, metrics=METRICS)
q = encode.predict(X)
w = np.array(q)
df = pd.DataFrame(data=w,index=None,columns=None)
df['label'] = y
df.to_csv('training_smote_autoencoded_4.csv', index=False)

Epoch 1/50
 133/8178 [..............................] - ETA: 42s - loss: 0.0223 - rms: 0.0360 - mae: 0.0094

KeyboardInterrupt: 