In [1]:
# univariate lstm example
from keras.models import Sequential
from keras.layers import LSTM, CuDNNLSTM, Dropout
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tsfresh.feature_extraction import feature_calculators
from tqdm import tqdm_notebook
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import time

Using TensorFlow backend.


## Generating features for a segment

In [2]:
# let's create a function to generate some statistical features based on the training data
def gen_features(X, part_1=True, part_2 = True, part_3 = False):
    X = pd.DataFrame(X)
    strain = []
    # part 1 
    if(part_1):
        zc = np.fft.fft(X)
        realFFT = np.real(zc)
        imagFFT = np.imag(zc)
        strain.append(realFFT.mean())
        strain.append(realFFT.std())
        strain.append(realFFT.max())
        strain.append(realFFT.min())
#         strain.append(imagFFT.mean())
#         strain.append(imagFFT.std())
#         strain.append(imagFFT.max())
#         strain.append(imagFFT.min())
    
    # part 2 
    if(part_2):
        strain.append(X.mean())
        strain.append(X.std())
        strain.append(X.max())
        strain.append(X.min())
        strain.append(X.kurtosis())
#         strain.append(feature_calculators.number_peaks(X, 10))
#         strain.append(feature_calculators.autocorrelation(X, 5))

    # part 3
    if(part_3):
        window_sizes = [100]
        for window in window_sizes:
            x_roll_std = X.rolling(window).std().dropna().values
            x_roll_mean = X.rolling(window).mean().dropna().values

            strain.append(x_roll_mean.max())
            strain.append(x_roll_mean.min())
            strain.append(np.quantile(x_roll_mean, 0.01)) #Als je niet weet wat dit doet vraag het aan Pepijn
            strain.append(np.quantile(x_roll_mean, 0.05))
            strain.append(np.quantile(x_roll_mean, 0.95))

            strain.append( np.quantile(x_roll_std, 0.01))
            strain.append(np.quantile(x_roll_std, 0.05))
            strain.append(np.quantile(x_roll_std, 0.95))
            strain.append(x_roll_std.min())
            strain.append(x_roll_mean.mean())      


    return pd.Series(strain)

## Generating features for every segment

In [3]:
def create_features_for_train(X, y, size_segment = 1000, n_features = 9 , n_segments = 4194):
    n_samples = int(150000/size_segment)
    
    X_sub = np.zeros((n_segments, n_samples, n_features))

    for index, segment in enumerate(tqdm_notebook(X)):
        sub_segments = np.split(X[index], n_samples)
        for i, sub_segment in enumerate(sub_segments):
            features = gen_features(sub_segment)
            for j, feature in enumerate(features):
                X_sub[index,i,j] = feature
    #         print(X_train_sub[index,i,:])

    y_sub =  np.zeros((n_segments))
    
    for i in range(len(y)):
        y_sub[i] = y[i][-1]
    return X_sub, y_sub

## Loading Train Data

In [4]:
train = pd.read_csv("../input/train.csv", dtype={'acoustic_data': np.int16, 'time_to_failure': np.float32})

# Display time_to_failure with more units of precision
pd.options.display.precision = 30

# Create numpy array from dataframe
train = train.as_matrix()

# Remove y values (time to failure)
y_train = train[:,1]

# Create a training file with simple derived features
rows = 150_000
segments = int(np.floor(train.shape[0] / rows))

X_train = train[:,0]
X_train = X_train[:-45480]
y_train = y_train[:-45480]

X_train = X_train.reshape((segments, rows))
y_train = y_train.reshape((segments, rows))

  import sys


## Create Train Features for LSTM

In [5]:
X_train_features, y_train_features = create_features_for_train(X_train,y_train, size_segment=1000, n_features=9, n_segments = 4194)

HBox(children=(IntProgress(value=0, max=4194), HTML(value='')))

## Create Standerdized Scaler

In [6]:
X_train_features_reshaped = np.reshape(X_train_features, (4194*150, 9))

scaler = StandardScaler()
scaler.fit(X_train_features_reshaped)

StandardScaler(copy=True, with_mean=True, with_std=True)

## Scale Train Data

In [7]:
X_train_scaled = []

for segment in X_train_features:
    X_train_scaled.append(scaler.transform(segment))

X_train_scaled = np.array(X_train_scaled)
print(X_train_scaled.shape)

(4194, 150, 9)


## Reshape Train Data for LSTM

In [8]:
# # reshape from [samples, timesteps] into [samples, timesteps, features]
# X_train_scaled = X.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], n_features))

In [9]:
def build_model(n_features, verbose=0):
    # The LSTM architecture
    model = Sequential()
    # First LSTM layer with Dropout regularisation
    model.add(CuDNNLSTM(units=50, return_sequences=True, input_shape=(None,n_features)))
    model.add(Dropout(0.3))
    # Second LSTM layer
    model.add(CuDNNLSTM(units=50, return_sequences=True))
    model.add(Dropout(0.3))
    # Third LSTM layer
    model.add(CuDNNLSTM(units=50, return_sequences=True))
    model.add(Dropout(0.3))
    # Fourth LSTM layer
    model.add(CuDNNLSTM(units=50))
    model.add(Dropout(0.3))
    # The output layer
    model.add(Dense(units=1))

    # Compiling the RNN
    if verbose:
        model.summary()
    return model


In [10]:
y_train_features.shape

(4194,)

In [11]:
n_fold = 8
folds = KFold(n_splits=n_fold, shuffle=True, random_state=11)

train_model(X_train_scaled, y_train_features, folds,n_features=9)

NameError: name 'train_model' is not defined

In [12]:
def train_model(X=X_train_scaled, y=y_train_features, folds=folds, n_features = 9):
    
    model = build_model(n_features)
    model.compile(optimizer='RMSprop',loss='mae')
    
    checkpoint = ModelCheckpoint('', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        print('Fold', fold_n, 'started at', time.ctime())
        
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min', min_delta = 0.005)
        mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

        model.fit(X_train, y_train, batch_size=64, epochs=1000, verbose=1, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_data=(X_valid, y_valid))
        

## Load Test Data

In [13]:
submission = pd.read_csv('../input/sample_submission.csv', index_col='seg_id')
X_test = pd.DataFrame(dtype=np.float64, index=submission.index)
size_segment = 1000
n_samples = int(150000/size_segment)
n_segments = len(X_test.index)
n_features = 9

X_test_features = np.zeros((n_segments, n_samples, n_features))
for index, seg_id in enumerate(tqdm_notebook(X_test.index)):
    segment = pd.read_csv('../input/test/' + seg_id + '.csv')
    sub_segments = np.split(segment, n_samples)
    for i, sub_segment in enumerate(sub_segments):
        features = gen_features(sub_segment)
        for j, feature in enumerate(features):
            X_test_features[index,i,j] = feature
    
    

HBox(children=(IntProgress(value=0, max=2624), HTML(value='')))




## Scaling Test Data 

In [14]:
X_test_scaled = []

for segment in X_test_features:
    X_test_scaled.append(scaler.transform(segment))

X_test_scaled = np.array(X_test_scaled)
print(X_test_scaled.shape)

(2624, 150, 9)


## Create submission

In [15]:
model = build_model(9)
model.load_weights('.mdl_wts.hdf5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


OSError: Unable to open file (unable to open file: name = '.mdl_wts.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [16]:
prediction_lstm = model.predict(X_test_scaled, verbose=0)

InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm_1/CudnnRNN (defined at /opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py:517) with these attrs: [seed=87654321, dropout=0, input_mode="linear_input", T=DT_FLOAT, direction="unidirectional", rnn_mode="lstm", seed2=0, is_training=true]
Registered devices: [CPU, XLA_CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm_1/CudnnRNN (defined at /opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py:517) ]]

Caused by op 'cu_dnnlstm_1/CudnnRNN', defined at:
  File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/opt/conda/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/opt/conda/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/opt/conda/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/opt/conda/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/opt/conda/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-0eed16456060>", line 1, in <module>
    model = build_model(9)
  File "<ipython-input-9-d04dc96288dd>", line 5, in build_model
    model.add(CuDNNLSTM(units=50, return_sequences=True, input_shape=(None,n_features)))
  File "/opt/conda/lib/python3.6/site-packages/keras/engine/sequential.py", line 165, in add
    layer(x)
  File "/opt/conda/lib/python3.6/site-packages/keras/layers/recurrent.py", line 532, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/keras/engine/base_layer.py", line 457, in __call__
    output = self.call(inputs, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py", line 90, in call
    output, states = self._process_batch(inputs, initial_state)
  File "/opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py", line 517, in _process_batch
    is_training=True)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1636, in __call__
    input_data, input_h, input_c, params, is_training=is_training)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1527, in __call__
    seed=self._seed)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1014, in _cudnn_rnn
    outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(**args)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py", line 142, in cudnn_rnn
    seed2=seed2, is_training=is_training, name=name)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm_1/CudnnRNN (defined at /opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py:517) with these attrs: [seed=87654321, dropout=0, input_mode="linear_input", T=DT_FLOAT, direction="unidirectional", rnn_mode="lstm", seed2=0, is_training=true]
Registered devices: [CPU, XLA_CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm_1/CudnnRNN (defined at /opt/conda/lib/python3.6/site-packages/keras/layers/cudnn_recurrent.py:517) ]]


In [17]:
# This can be used to create a download link for the submission file

from IPython.display import HTML
import base64

def create_download_link(df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

In [18]:
submission['time_to_failure'] = prediction_lstm
create_download_link(submission['time_to_failure'], filename = "LSTM_simple_features.csv")

NameError: name 'prediction_lstm' is not defined

In [19]:
submission

Unnamed: 0_level_0,time_to_failure
seg_id,Unnamed: 1_level_1
seg_00030f,0
seg_0012b5,0
seg_00184e,0
seg_003339,0
seg_0042cc,0
seg_004314,0
seg_004cd2,0
seg_004ee5,0
seg_004f1f,0
seg_00648a,0
