In [1]:
import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2,3"

from tensorflow.python.client import device_lib
print (device_lib.list_local_devices())

  from ._conv import register_converters as _register_converters


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9320283792285231798
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10654105600
locality {
  bus_id: 1
}
incarnation: 13703534341667828033
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0, compute capability: 6.1"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 10965090304
locality {
  bus_id: 1
}
incarnation: 7615065375862468699
physical_device_desc: "device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:b3:00.0, compute capability: 6.1"
]


In [2]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility


import gzip
import sys
if (sys.version_info > (3, 0)):
    import pickle as pkl
else: #Python 2.7 imports
    import cPickle as pkl

import keras
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Activation, Flatten, concatenate
from keras.layers import Embedding
from keras.layers import Convolution1D, MaxPooling1D, GlobalMaxPooling1D
from keras.regularizers import Regularizer
from keras.preprocessing import sequence

Using TensorFlow backend.


In [3]:
def wordIdxLookup(word, word_idx_map):
    if word in word_idx_map:
        return word_idx_map[word]

In [4]:
data = pkl.load(gzip.open("/home/dl1/Arav/Neuralnets/Session 2 - Sentence CNN/code/pkl/data.pkl.gz","rb"))
print("data loaded!")

data loaded!


In [5]:
train_labels = data['train']['labels']
train_sentences = data['train']['sentences']

dev_labels = data['dev']['labels']
dev_sentences = data['dev']['sentences']

test_labels = data['test']['labels']
test_sentences = data['test']['sentences']

word_embeddings = data['wordEmbeddings']

In [6]:
# :: Find the longest sentence in our dataset ::
max_sentence_len = 0
for sentence in train_sentences + dev_sentences + test_sentences:
    max_sentence_len = max(len(sentence), max_sentence_len)

print("Longest sentence: %d" % max_sentence_len)

Longest sentence: 59


In [7]:
y_train = np.array(train_labels)
y_dev = np.array(dev_labels)
y_test = np.array(test_labels)

X_train = sequence.pad_sequences(train_sentences, maxlen=max_sentence_len)
X_dev = sequence.pad_sequences(dev_sentences, maxlen=max_sentence_len)
X_test = sequence.pad_sequences(test_sentences, maxlen=max_sentence_len)


print('X_train shape:', X_train.shape)
print('X_dev shape:', X_dev.shape)
print('X_test shape:', X_test.shape)

X_train shape: (5330, 59)
X_dev shape: (2664, 59)
X_test shape: (2668, 59)


In [8]:
y_train

array([0, 0, 0, ..., 1, 1, 1])

In [9]:
#  :: Create the network :: 

print('Build model...')

# set parameters:
batch_size = 50

nb_filter = 50
filter_lengths = [1,2,3]
hidden_dims = 100
nb_epoch = 10



words_input = Input(shape=(max_sentence_len,), dtype='int32', name='words_input')

Build model...


In [10]:
words_input.shape

TensorShape([Dimension(None), Dimension(59)])

In [11]:
#Our word embedding layer
wordsEmbeddingLayer = Embedding(word_embeddings.shape[0],
                    word_embeddings.shape[1],                                     
                    weights=[word_embeddings],
                    trainable=False)

words = wordsEmbeddingLayer(words_input)


In [12]:
#Now we add a variable number of convolutions
words_convolutions = []
for filter_length in filter_lengths:
    words_conv = Convolution1D(filters=nb_filter,
                            kernel_size=filter_length,
                            padding='same',
                            activation='relu',
                            strides=1)(words)
                            
    words_conv = GlobalMaxPooling1D()(words_conv)      
    
    words_convolutions.append(words_conv)  

output = concatenate(words_convolutions)



# We add a vanilla hidden layer together with dropout layers:
output = Dropout(0.5)(output)
output = Dense(hidden_dims, activation='tanh', kernel_regularizer=keras.regularizers.l2(0.01))(output)
output = Dropout(0.25)(output)


# We project onto a single unit output layer, and squash it with a sigmoid:
output = Dense(1, activation='sigmoid',  kernel_regularizer=keras.regularizers.l2(0.01))(output)

model = Model(inputs=[words_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
words_input (InputLayer)        (None, 59)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 59, 300)      4966200     words_input[0][0]                
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 59, 50)       15050       embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 59, 50)       30050       embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_3 (

In [13]:
for epoch in range(nb_epoch):
    print("\n------------- Epoch %d ------------" % (epoch+1))
    model.fit(X_train, y_train, batch_size=batch_size, epochs=1)
    
    #Use Keras to compute the loss and the accuracy
    dev_loss, dev_accuracy = model.evaluate(X_dev, y_dev, verbose=False)
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=False)
    
  
    print("Val-Accuracy: %.2f%% (loss: %.4f)" % (dev_accuracy*100, dev_loss))
    print("Test-Accuracy: %.2f%% (loss: %.4f)" % (test_accuracy*100, test_loss))


------------- Epoch 1 ------------
Epoch 1/1
Val-Accuracy: 71.51% (loss: 1.1190)
Test-Accuracy: 70.50% (loss: 1.1249)

------------- Epoch 2 ------------
Epoch 1/1
Val-Accuracy: 76.80% (loss: 0.7631)
Test-Accuracy: 75.19% (loss: 0.7756)

------------- Epoch 3 ------------
Epoch 1/1
Val-Accuracy: 77.03% (loss: 0.6307)
Test-Accuracy: 76.01% (loss: 0.6463)

------------- Epoch 4 ------------
Epoch 1/1
Val-Accuracy: 78.64% (loss: 0.5527)
Test-Accuracy: 76.80% (loss: 0.5714)

------------- Epoch 5 ------------
Epoch 1/1
Val-Accuracy: 78.23% (loss: 0.5311)
Test-Accuracy: 77.02% (loss: 0.5440)

------------- Epoch 6 ------------
Epoch 1/1
Val-Accuracy: 76.80% (loss: 0.5570)
Test-Accuracy: 76.01% (loss: 0.5772)

------------- Epoch 7 ------------
Epoch 1/1
Val-Accuracy: 78.30% (loss: 0.5421)
Test-Accuracy: 76.99% (loss: 0.5628)

------------- Epoch 8 ------------
Epoch 1/1
Val-Accuracy: 78.64% (loss: 0.5263)
Test-Accuracy: 78.30% (loss: 0.5379)

------------- Epoch 9 ------------
Epoch 1/1
Va

In [14]:
preds = model.predict(X_test, batch_size=batch_size)

In [16]:
preds[0]

array([0.04080284], dtype=float32)

In [43]:
y_test[0]

0

In [17]:
result_data = pkl.load(gzip.open("/home/dl1/Arav/Neuralnets/Session 2 - Sentence CNN/code/pkl/resultdata`.pkl.gz","rb"))
print("data loaded!")

data loaded!


In [18]:
result_data.tolist()

[[2, 1694, 4694, 7, 136, 11, 1786, 259, 113, 1508, 308, 1, 1]]

In [19]:
result_X = sequence.pad_sequences(result_data, maxlen=max_sentence_len)
# result_data.extend([0] * (59 - len(result_data)))

In [20]:
result_X

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    2, 1694, 4694,    7,  136,   11, 1786,  259,  113,
        1508,  308,    1,    1]], dtype=int32)

In [21]:
result_y_pred = model.predict(result_X, batch_size=batch_size)

In [22]:
result_y_pred

array([[0.9560446]], dtype=float32)