In [None]:
from keras.layers import Embedding, ELU, Dropout, Flatten, Input, Dense, BatchNormalization, Conv1D, MaxPooling1D, LSTM, CuDNNLSTM
from keras.layers import concatenate, multiply, Bidirectional
from keras.models import Sequential
from keras.layers import TimeDistributed, Reshape, RepeatVector, Lambda, Activation
from keras.regularizers import l2, l1_l2
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils.np_utils import to_categorical
from keras import backend as K
from keras import Model
from keras.callbacks import LearningRateScheduler

import pandas as pd
import numpy as np

data = pd.read_csv('train.csv',header=None,dtype='float',names=list(range(4096)))
test = pd.read_csv('test.csv',header=None,dtype='float',names=list(range(4096)))
label = pd.read_csv('train_label.csv')['category'].values.reshape(-1, 1)

In [None]:
maxlen=4096
embed_size=64
lstm_layer_size = 100
num_layers = 1

model = Sequential()
model.add(Embedding(256, embed_size, input_length=maxlen, embeddings_regularizer=l2(1e-4)))
model.add(Dropout(0.5))

for l in range(0, num_layers - 1):
    model.add(Bidirectional(CuDNNLSTM(lstm_layer_size, kernel_regularizer=l2(1e-5), recurrent_regularizer=l2(1e-5), return_sequences=True)))
    model.add(Dropout(0.5))
model.add(Bidirectional(CuDNNLSTM(lstm_layer_size, kernel_regularizer=l2(1e-5), recurrent_regularizer=l2(1e-5), return_sequences=True)))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(1, activation='sigmoid', name='loss_out'))

def scheduler(epoch):
    if epoch%1==0 and epoch!=0:
        lr = K.get_value(model.optimizer.lr)
        K.set_value(model.optimizer.lr, lr*.5)
        print("lr changed to {}".format(lr*.5))
    return K.get_value(model.optimizer.lr)

lr_decay = LearningRateScheduler(scheduler)

optimizer = Adam(lr=0.004, clipnorm=1.0)
model.compile(optimizer, loss='binary_crossentropy', metrics=['binary_accuracy'])
model.summary()

In [None]:
es = EarlyStopping(monitor='binary_accuracy', min_delta=0, patience=0, verbose=0, mode='auto')
history = model.fit(data, label, batch_size=200, epochs=10, verbose=1, callbacks=[es, lr_decay], validation_split=0.1, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)

print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['val_acc'])
plt.title('model validation accuracy')
plt.ylabel('validation accuracy')
plt.xlabel('epoch')
plt.show()

plt.plot(history.history['acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

# summarize history for loss
plt.plot(history.history['val_loss'])
plt.title('model validation loss')
plt.ylabel('validation loss')
plt.xlabel('epoch')
plt.show()

plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

In [None]:
result = model.predict(test, batch_size=None, verbose=1)
s=np.arange(result.shape[0]).astype(int)
a= np.zeros((result.shape[0],2)) 
a= np.vstack([s,y.reshape(-1)])
np.savetxt("prediction_rnn.csv", a.T, delimiter=',', fmt='%i,%f', header="sample_id,malware", comments="")