In [20]:
#borrowed from Preston and implemented as needed
from keras.models import Model
from keras.layers import Dense, Flatten, Activation, AlphaDropout, Input, Add, Concatenate
from keras.activations import selu
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, RMSprop, Adamax, SGD, Nadam
from keras.losses import logcosh
from clr import CyclicLR
import keras.backend as K
import math

import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler

In [47]:
class SNN:
    '''
    Small SNN for medical data prediction.
    Usage:
        snn = SNN(input_shape)
        model = snn.create_model()
        model.train...
    '''

    def __init__(self, input_shape):
        self.input_shape = input_shape
        self.max_label = 0.0

    def create_model(self) -> Model:
        '''
        Creates model. Returns Keras Model object.
        Model follow simple encoder-decoder structure.
        Uses SELU to self-regularize without batch-normalization.
        Treats all values in batch as single, flattened array of values.        
        '''
        input_tensor = Input(self.input_shape)
        x = input_tensor
        starting_neurons = 2048
        starting_alpha = 0.025
        decay = 0.6
        alpha_decay = 0.6
        layers = 3

        # skip_conn = []
        # for i in range(layers):
        #     x = Dense(int(starting_neurons * math.pow(decay, i)))(x)
        #     x = self.selu_layer(x, int(
        #         starting_neurons * math.pow(decay, i)), starting_alpha * math.pow(alpha_decay, i))
        #     skip_conn.append(x)
        # latent = self.selu_layer(
        #     x, int(starting_neurons * math.pow(decay, layers - 1)))
        # x = latent
        # for i in range(layers):
        #     p = layers - i - 1
        #     x = Dense(int(starting_neurons * math.pow(decay, p)))(x)
        #     x = self.selu_layer(x, int(starting_neurons * math.pow(decay, p)))
        #     x = Add()([x, skip_conn.pop()])
        for i in range(layers):
            x = Dense(int(starting_neurons * math.pow(decay, i)), kernel_initializer="lecun_normal")(x)
            x = self.selu_layer(x,int(starting_neurons * math.pow(decay, i)), starting_alpha * math.pow(alpha_decay, i))
        x = Dense(25, kernel_initializer="lecun_normal")(x)
        latent = self.selu_layer(x, 25, 0.0)
        x = Activation(selu,name='latent')(latent) 
        # x = Dense(self.input_shape[0], kernel_initializer="lecun_normal")(x)
        # Reconstruction loss
        recon = Dense(
            self.input_shape[1], kernel_initializer="lecun_normal", name="r")(x)
        # Single unit linear layer for output
        x = Dense(16, kernel_initializer='lecun_normal', activation=selu)(latent)
        x = Dense(16, kernel_initializer='lecun_normal', activation=selu)(x)
        x = Dense(16, kernel_initializer='lecun_normal')(x)

        x = Dense(1, kernel_initializer='lecun_normal', name="x")(x)
        # x = Concatenate()([x, recon])

        model = Model(inputs=input_tensor, outputs=[x, recon])
        model.compile(loss='logcosh',
                      optimizer=RMSprop(lr=0), loss_weights=[.1, 100])#, metrics=[self.label, self.recon])
        return model

    def selu_layer(self, x, units, rate:float=0):
        a = Activation(selu)(x)
        a = Dense(units, kernel_initializer='lecun_normal')(x)


        a = Dense(units, kernel_initializer='lecun_normal')(a)
        # Heavy regularization due to small sample sizes
        a = AlphaDropout(rate)(a)
        a = Add()([a, x])


        return a

    def dynamic_recon_loss(self, y_true, y_pred):
        label_error = self.label(y_true, y_pred)
        recon_error = self.recon(y_true, y_pred)

        return label_error * recon_error

    def recon(self, y_true, y_pred):
        # Split stuff
        recon_true = y_true[:, 1:]
        recon_pred = y_pred[:, 1:]

        recon_error = logcosh(recon_true, recon_pred)

        return recon_error

    def label(self, y_true, y_pred):
        # Split stuff
        label_true = y_true[:, 0]
        label_pred = y_pred[:, 0]
        label_error = logcosh(label_true, label_pred)

        return label_error

    def earlystopping(self, patience=10):
        return EarlyStopping("recon", patience=patience, restore_best_weights=True)
    def clr(self):
        return CyclicLR(base_lr=7e-5, max_lr=6e-4, step_size=200, mode="triangular2")


In [75]:
input_shape = (300,1)
snn = SNN(input_shape)
model = snn.create_model()

In [73]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 1, 300)       0                                            
__________________________________________________________________________________________________
dense_106 (Dense)               (None, 1, 2048)      616448      input_8[0][0]                    
__________________________________________________________________________________________________
dense_107 (Dense)               (None, 1, 2048)      4196352     dense_106[0][0]                  
__________________________________________________________________________________________________
dense_108 (Dense)               (None, 1, 2048)      4196352     dense_107[0][0]                  
__________________________________________________________________________________________________
alpha_drop

In [59]:
# import data
train = pd.read_csv("/Users/JoonH/dont-overfit-ii/train.csv")
train_y = train['target']
train_X = train.drop(['id','target'], axis=1).values

test = pd.read_csv("/Users/JoonH/dont-overfit-ii/test.csv")
test = test.drop(['id'], axis=1).values

# scale using RobustScaler
# fitting scaler on full data outperforms fitting on test_X only (+0.006 kaggle score)
data = RobustScaler().fit_transform(np.concatenate((train_X, test), axis=0))
train_X = data[:250]
test = data[250:]

In [60]:
train_x = np.expand_dims(train_X,-1)

In [40]:
train_label = np.expand_dims(train_y,0)

In [68]:
train_x.shape

(250, 300, 1)

In [74]:
model.fit(x=train_x, y=train_x, validation_split = 0.35, epochs = 10, batch_size=25)

ValueError: Error when checking input: expected input_8 to have shape (1, 300) but got array with shape (300, 1)