In [None]:
import os
import time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import math
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, GRU, Conv1D, Flatten, GlobalMaxPooling1D
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.models import Model, Sequential
from keras import initializers, regularizers, constraints, optimizers, layers

In [None]:
train_df = pd.read_csv("train.csv")
print("Train shape : ",train_df.shape)

In [None]:
## fill up the missing values
embed_size = 300 # how big is each word vector
max_features = 50000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 100 # max number of words in a question to use




In [None]:
train_X = train_df["question_text"].fillna("_na_").values
train_Y = np.array(train_df["target"])

## Tokenize the sentences
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(train_X))
train_X = tokenizer.texts_to_sequences(train_X)

## split to train and val
train_size = 50000

my_indices = (np.random.permutation( len(train_X)))


val_X = [train_X[i] for i in my_indices[train_size:] ]
train_X = [train_X[i] for i in my_indices[0:train_size] ]

val_Y = [train_Y[i] for i in my_indices[train_size:] ]
train_Y = [train_Y[i] for i in my_indices[0:train_size] ]


## Pad the sentences 
train_X = pad_sequences(train_X, maxlen=maxlen)
val_X = pad_sequences(val_X, maxlen=maxlen)

np.save('saved_arrays/subsampled_train_X', train_X)
np.save('saved_arrays/subsampled_train_Y', train_Y)
np.save('saved_arrays/subsampled_val_X', val_X)
np.save('saved_arrays/subsampled_val_Y', val_Y)

In [None]:
train_X = np.load('saved_arrays/subsampled_train_X.npy' )
train_Y = np.load('saved_arrays/subsampled_train_Y.npy' )
val_X = np.load('saved_arrays/subsampled_val_X.npy' )
val_Y = np.load('saved_arrays/subsampled_val_Y.npy' )

In [None]:
val_X_reduced =val_X[0:50000]
val_Y_reduced =val_Y[0:50000]

## WITHOUT Embeddings

## Dense ANN 

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, input_length = maxlen ))
    model.add(Dense(64, activation="relu"))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)

pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)


In [None]:

for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_no_embeddings/dense_thresh_32.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_no_embeddings/dense_thresh_32.h5')

## CNN

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, input_length = maxlen ))
    
    model.add(Conv1D(128, 2, activation='relu'))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)



In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)


In [None]:

for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_no_embeddings/CNN_thresh_29.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_no_embeddings/CNN_thresh_29.h5')

## LSTM

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, input_length = maxlen ))
    
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)

In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)

In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_no_embeddings/LSTM_thresh_25.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_no_embeddings/LSTM_thresh_25.h5')

In [None]:
EMBEDDING_FILE = 'glove.840B.300d/glove.840B.300d.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE, encoding = 'utf-8'))

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector
np.save('saved_arrays/glove_embedding_matrix', embedding_matrix)        


In [None]:
embedding_matrix = np.load('saved_arrays/glove_embedding_matrix.npy')

In [None]:
from gensim.models import KeyedVectors

EMBEDDING_FILE = 'GoogleNews-vectors-negative300/GoogleNews-vectors-negative300.bin'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE, binary=True))

all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
embed_size = all_embs.shape[1]

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector
np.save('saved_arrays/google_embedding_matrix', embedding_matrix)        
embedding_matrix = np.load('saved_arrays/google_embedding_matrix.npy')

## With Embeddings 

## DENSE ANN

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
    model.add(Dense(64, activation="relu"))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)



In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)


In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_with_embeddings/Dense_thresh_23.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_with_embeddings/Dense_thresh_23.h5')

## CNN

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
    
    model.add(Conv1D(128, 2, activation='relu'))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)



In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)

In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_with_embeddings/CNN_thresh_48.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_with_embeddings/CNN_thresh_48.h5')

## LSTM

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(GlobalMaxPool1D())
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)

In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)

In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_with_embeddings/LSTM_thresh_25.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one
model = load_model('saved_models_with_embeddings/LSTM_thresh_25.h5')

In [None]:
from keras import backend as K
from keras.engine.topology import Layer


class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim
    
    def get_config(self):
        config = super().get_config()
        config['step_dim'] = self.step_dim # say self. _localization_net  if you store the argument in __init__
        return config



## Adding Attention

## DENSE NN

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
    model.add(Dense(64, activation="relu"))
    model.add(Attention(maxlen))
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)


In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)

In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_with_attention_embed/Dense_thresh_26.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one


In [None]:
model = load_model('saved_models_with_attention_embed/Dense_thresh_26.h5', custom_objects={'Attention': Attention})

## LSTM with attention

In [None]:
# Define model here
def model_compile():
    
    model = Sequential()
    model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(Attention(maxlen))
    model.add(Dense(16, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = model_compile()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1)

In [None]:
pred_noemb_val_y = model.predict([val_X], batch_size=1024, verbose=1)

In [None]:
for thresh in np.arange(0.01, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_Y, (pred_noemb_val_y>thresh).astype(int))))

In [None]:
from keras.models import load_model

model.save('saved_models_with_attention_embed/LSTM_thresh_23.h5')  # creates a HDF5 file 'my_model.h5'
del model

# returns a compiled model
# identical to the previous one


In [None]:
model = load_model('saved_models_with_attention_embed/LSTM_thresh_23.h5', custom_objects={'Attention': Attention})

In [None]:
model.summary()

## CNN Loss Plot

In [None]:
# # Define model here     
class F1Evaluation(Callback):
    def __init__(self, validation_data=(), interval=10):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data
        
    def on_train_begin(self, logs={}):
        self.valf1 = []
        
    def on_batch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            y_pred = (y_pred > 0.48).astype(int)
            score = metrics.f1_score(self.y_val, y_pred)
            print(score)
            self.valf1.append(score)


model = Sequential()
model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
model.add(Conv1D(128, 2, activation='relu'))
model.add(GlobalMaxPool1D())
model.add(Dense(16, activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = F1Evaluation(validation_data=(val_X_reduced, val_Y_reduced), interval=10)
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1, callbacks=[history])


In [None]:
CNN_valf1 = history.valf1


In [None]:
CNN_valf1

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='CNN_arch.png', show_shapes=True, show_layer_names=True)

In [None]:
model.summary()

In [None]:
from keras.backend import clear_session 
clear_session()

## Dense Model

In [None]:
# # Define model here
from keras.callbacks import Callback
class F1Evaluation(Callback):
    def __init__(self, validation_data=(), interval=10):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data
        
    def on_train_begin(self, logs={}):
        self.valf1 = []
        
    def on_batch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            y_pred = (y_pred > 0.23).astype(int)
            score = metrics.f1_score(self.y_val, y_pred)
            print(score)
            self.valf1.append(score)

            
model = Sequential()
model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
model.add(Dense(64, activation="relu"))
model.add(GlobalMaxPool1D())
model.add(Dense(16, activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = F1Evaluation(validation_data=(val_X_reduced, val_Y_reduced), interval=10)

model.fit(train_X, train_Y, batch_size=256, epochs=2, verbose = 1, callbacks=[history])


In [None]:
Dense_val_f1 = history.valf1

In [None]:
Dense_val_f1

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='Dense_arch.png', show_shapes=True, show_layer_names=True)
from keras.backend import clear_session 
clear_session()

## LSTM

In [None]:
# # Define model here
        
model = Sequential()
model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(GlobalMaxPool1D())
model.add(Dense(16, activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = LossHistory()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1, callbacks=[history])

In [None]:
LSTM_loss = history.losses
LSTM_val_f1 = history

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='LSTM_arch.png', show_shapes=True, show_layer_names=True)
from keras.backend import clear_session 
clear_session()

## LSTM with attention

In [None]:
# # Define model here
    
        
model = Sequential()
model.add(Embedding(max_features, embed_size, weights=[embedding_matrix], input_length = maxlen ))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Attention(maxlen))
model.add(Dense(16, activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = LossHistory()
model.fit(train_X, train_Y, batch_size=256, epochs=2, validation_data=(val_X_reduced, val_Y_reduced), verbose = 1, callbacks=[history])

In [None]:
LSTM_wa_loss = history.losses
LSTM_wa_val_loss = history.val_loss

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='LSTM_wa_arch.png', show_shapes=True, show_layer_names=True)
from keras.backend import clear_session 
clear_session()

In [None]:
LSTM_wa_val_loss

In [None]:
import pylab as pl
pl.rcParams['figure.figsize']  = 10, 7.5
pl.rcParams['lines.linewidth'] = 1.5
pl.rcParams['font.family']     = 'serif'
pl.rcParams['font.weight']     = 'bold'
pl.rcParams['font.size']       = 20
pl.rcParams['font.sans-serif'] = 'serif'
pl.rcParams['text.usetex']     = True
pl.rcParams['axes.linewidth']  = 1.5
pl.rcParams['axes.titlesize']  = 'medium'
pl.rcParams['axes.labelsize']  = 'medium'

pl.rcParams['xtick.major.size'] = 8
pl.rcParams['xtick.minor.size'] = 4
pl.rcParams['xtick.major.pad']  = 8
pl.rcParams['xtick.minor.pad']  = 8
pl.rcParams['xtick.color']      = 'k'
pl.rcParams['xtick.labelsize']  = 'medium'
pl.rcParams['xtick.direction']  = 'in'

pl.rcParams['ytick.major.size'] = 8
pl.rcParams['ytick.minor.size'] = 4
pl.rcParams['ytick.major.pad']  = 8
pl.rcParams['ytick.minor.pad']  = 8
pl.rcParams['ytick.color']      = 'k'
pl.rcParams['ytick.labelsize']  = 'medium'
pl.rcParams['ytick.direction']  = 'in'

In [None]:
x = np.array(range(len(LSTM_wa_loss)))

from scipy.interpolate import spline

xnew = np.linspace(x.min(),x.max(),100) #300 represents number of points to make between T.min and T.max

power_smooth_LSTM_wa_loss = spline(x,LSTM_wa_loss,xnew)
power_smooth_LSTM_loss = spline(x,LSTM_loss,xnew)
power_smooth_CNN_loss = spline(x,CNN_loss,xnew)
power_smooth_Dense_loss = spline(x,Dense_loss,xnew)


pl.plot(xnew,power_smooth_LSTM_wa_loss, '-', label = 'LSTM with attention layer')
pl.plot(xnew,power_smooth_LSTM_loss, '-', label = 'LSTM')
pl.plot(xnew,power_smooth_CNN_loss, '-', label = 'CNN')
pl.plot(xnew,power_smooth_Dense_loss, '-', label = 'Feed Foward Network')
pl.legend()
pl.xlabel('Batches')
pl.ylabel('Binary Cross Entropy Loss')
pl.title('Training Loss Progress with incoming batches')
pl.savefig('TrainingLoss.png')
pl.show()
pl.clf()

In [None]:
Dense_val_loss