[View in Colaboratory](https://colab.research.google.com/github/yylonly/ServeNet/blob/master/4_ServeNet_2D_CNN_1_BI_LTSM(512)_FC_(Glove200b_trainedEmbeddingLayer).ipynb)

## CNN

In [1]:
seed = 12345

import numpy as np
import tensorflow as tf
import random as rn

# The below is necessary in Python 3.2.3 onwards to
# have reproducible behavior for certain hash-based operations.
# See these references for further details:
# https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
# https://github.com/keras-team/keras/issues/2280#issuecomment-306959926

import os
os.environ['PYTHONHASHSEED'] = '0'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.

np.random.seed(seed)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.

rn.seed(seed)

# Force TensorFlow to use single thread.
# Multiple threads are a potential source of
# non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see: https://www.tensorflow.org/api_docs/python/tf/set_random_seed

tf.set_random_seed(seed)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

# Rest of code follows ...

Using TensorFlow backend.


In [12]:
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import h5py
import pandas as pd

#from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.models import load_model
from keras import metrics
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Conv2D, Reshape, Average, Flatten
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.initializers import glorot_uniform
from keras.callbacks import ModelCheckpoint
from keras.layers.wrappers import Bidirectional
from keras.initializers import Orthogonal

%matplotlib inline

## 1. Load data

In [3]:
h5f = h5py.File('../Data/SplittedPaddedIndexedServiceDataset.h5','r') 
X_train = h5f['indexed_padded_Train_X'][:]
Y_train = h5f['Train_Y_one_hot'][:]
X_test = h5f['indexed_padded_Test_X'][:]
Y_test = h5f['Test_Y_one_hot'][:]
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)
h5f.close()

(8123, 110) (8123, 50)
(2061, 110) (2061, 50)


In [4]:
X_train[0]

array([357266, 146233, 306318,  68100, 268046, 340949, 226138, 192973,
       269953, 268046,   8172, 304244, 306318,  68159, 188481, 357266,
       146211, 350362, 357266, 306262, 126057, 268046, 357266,  68100,
       225650, 360915, 293378, 297112, 133215, 306262,  54718, 108280,
       360915, 133215, 287783, 125166, 188481, 348215, 268046, 358160,
       163265, 193716, 174032, 111449,  57488, 357212, 220870, 122453,
        45107, 357266, 117493, 343876, 269798, 193919, 384515, 333113,
       357266, 153371,  57459, 220930, 374205, 297544, 357266, 146233,
       306318, 133215, 117493, 153371,  54718,  58800, 146233, 306318,
       133215, 117493,  51203, 117505, 360915, 306966, 339034, 117493,
       357266, 306162, 117493,  90548,  71090, 114153,  45217, 360915,
       117493, 338227, 305005,  93724, 325550,  54718, 272583, 291376,
       357266,  57459, 374208, 306616,  89943,  54718, 306550,  58997,
       151766, 188481, 392023,      0,      0,      0], dtype=int32)

In [5]:
Y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

## 2. ServeNet - Embedding Layer

In [6]:
def read_glove_vecs(glove_file):
    with open(glove_file, 'r', encoding="utf-8") as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

In [7]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../Data/glove.6B.200d.txt')

In [8]:
# GRADED FUNCTION: pretrained_embedding_layer

def trainable_embedding_layer():
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    
    ### START CODE HERE ###
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        embedding_vector = word_to_vec_map.get(word)
        if embedding_vector is not None:
            emb_matrix[index, :] = embedding_vector

    # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
    ### END CODE HERE ###

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

## 2. CNN

In [9]:
def CNN(input_shape):
    """
    Function creating the Emojify-v2 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    
    ### START CODE HERE ###
    # Define sentence_indices as the input of the graph, it should be of shape input_shape and dtype 'int32' (as it contains indices).
    sentence_indices = Input(shape=input_shape, dtype='int32')
    
    # Create the embedding layer pretrained with GloVe Vectors (≈1 line)
    embedding_layer = trainable_embedding_layer()
    
    # Propagate sentence_indices through your embedding layer, you get back the embeddings
    embeddings = embedding_layer(sentence_indices) 
     
    embeddings = Reshape((110, 200, 1))(embeddings)
    
    cnn1 = Conv2D(128, kernel_size=(5, 5), padding='same', kernel_initializer=glorot_uniform(seed=seed))(embeddings)
    cnn1 = Dropout(0.6, seed = seed)(cnn1)
    cnn2 = Conv2D(64, kernel_size=(3, 3), padding='same', kernel_initializer=glorot_uniform(seed=seed))(cnn1)
    cnn2 = Dropout(0.6, seed = seed)(cnn2)
    cnn3 = Conv2D(1, kernel_size=(1, 1), padding='same', kernel_initializer=glorot_uniform(seed=seed))(cnn2)
    features_cnn = Reshape((110, 200))(cnn3)
     
    flat = Flatten()(features_cnn)  
      
    # Propagate X through a Dense layer with softmax activation to get back a batch of 5-dimensional vectors.
    X = Dense(1024, activation='tanh', kernel_initializer=glorot_uniform(seed))(flat)
    X = Dropout(0.6, seed = seed)(X)
    X = Dense(400, activation='tanh', kernel_initializer=glorot_uniform(seed))(X)
    X = Dropout(0.6, seed = seed)(X)
    X = Dense(50, activation='softmax', kernel_initializer=glorot_uniform(seed))(X)
    
    # Create Model instance which converts sentence_indices into X.
    model = Model(inputs=sentence_indices, outputs=X)
    
    ### END CODE HERE ###
    
    return model

In [10]:
maxLen = 110

In [13]:
model = CNN((maxLen, ))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 110)               0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 110, 200)          80000200  
_________________________________________________________________
reshape_3 (Reshape)          (None, 110, 200, 1)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 110, 200, 128)     3328      
_________________________________________________________________
dropout_3 (Dropout)          (None, 110, 200, 128)     0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 110, 200, 64)      73792     
_________________________________________________________________
dropout_4 (Dropout)          (None, 110, 200, 64)      0         
__________

In [12]:
#for i in range(len(model.layers)):
#  print(model.layers[i].get_weights())

In [13]:
#for i in range(len(model.layers)):
#  print(model.layers[i].get_weights())

In [14]:
checkpointer = ModelCheckpoint(filepath='../Data/CNN.hdf5', monitor='val_top_k_categorical_accuracy', verbose=1, save_best_only=True)

In [15]:
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0)

In [16]:
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=[metrics.top_k_categorical_accuracy, metrics.categorical_accuracy])

In [None]:
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 20, batch_size = 64, verbose = 1, shuffle=False, callbacks=[checkpointer])

Train on 8123 samples, validate on 2061 samples
Epoch 1/20

Epoch 00001: val_top_k_categorical_accuracy improved from -inf to 0.35226, saving model to ../Data/CNN.hdf5
Epoch 2/20

Epoch 00002: val_top_k_categorical_accuracy improved from 0.35226 to 0.48132, saving model to ../Data/CNN.hdf5
Epoch 3/20

Epoch 00003: val_top_k_categorical_accuracy improved from 0.48132 to 0.54003, saving model to ../Data/CNN.hdf5
Epoch 4/20

Epoch 00004: val_top_k_categorical_accuracy improved from 0.54003 to 0.56963, saving model to ../Data/CNN.hdf5
Epoch 5/20

Epoch 00005: val_top_k_categorical_accuracy improved from 0.56963 to 0.58467, saving model to ../Data/CNN.hdf5
Epoch 6/20

Epoch 00006: val_top_k_categorical_accuracy did not improve from 0.58467
Epoch 7/20

Epoch 00007: val_top_k_categorical_accuracy did not improve from 0.58467
Epoch 8/20

Epoch 00008: val_top_k_categorical_accuracy did not improve from 0.58467
Epoch 9/20

In [None]:
# plot metrics
plt.figure(figsize=(8, 4), dpi=300)
plt.title("Top 1 Accuracy: Tranning Set vs Test Set")
plt.xlabel("Epoch")
plt.ylabel("Top 1 Accuracy")
plt.plot(history.history['val_categorical_accuracy'], label="Test Set")
plt.plot(history.history['categorical_accuracy'], label="Trainning Set")
plt.legend()
plt.show()
plt.savefig('CNNTop1.pdf', format='pdf', dpi=300)


### Plot Loss

In [None]:
# plot metrics
plt.figure(figsize=(8, 4), dpi=300)
plt.title("Top 5 Accuracy: Tranning Set vs Test Set")
plt.xlabel("Epoch")
plt.ylabel("Top 5 Accuracy")
plt.plot(history.history['top_k_categorical_accuracy'], label="Trainning Set")
plt.plot(history.history['val_top_k_categorical_accuracy'], label="Test Set")
plt.legend()
plt.show()
plt.savefig('CNNTop5.pdf', format='pdf', dpi=300)

In [None]:
# plot metrics
plt.figure(figsize=(8, 4), dpi=300)
plt.title("Tranning Loss vs Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(history.history['val_loss'], label="Test Set")
plt.plot(history.history['loss'], label="Trainning Set")
plt.legend()
plt.show()
plt.savefig('CNNLoss.pdf', format='pdf', dpi=300)

### Save History

In [None]:
import pickle
  
f = open('CNNHistory', 'wb')
pickle.dump(history.history, f)
f.close()

### Load History

In [None]:
import pickle

f = open('CNNtHistory', 'rb')
his = pickle.load(f)
f.close()

In [None]:
val_top_k = his['val_top_k_categorical_accuracy']
print("top5: ", max(val_top_k))
print(np.argmax(val_top_k))
val_loss = his['val_loss']
print("loss: ", min(val_loss))
print(np.argmin(val_loss))
val_ca = his['val_categorical_accuracy']
print("top1: ", max(val_ca))
print(np.argmax(val_ca))

### Model Save (delete)

In [None]:
#model.save('ServeNet.h5') 

### Model Load

In [None]:
model = load_model('../Data/CNN.hdf5')

In [None]:
print("Training set:")
loss_train, top5error_train, mae_train = model.evaluate(X_train, Y_train)
print("Training accuracy = ", top5error_train)
print('Test set:')
loss_test, top5error_test, mae_test = model.evaluate(X_test, Y_test)
print("Training accuracy = ", top5error_test)

In [None]:
# This code allows you to see the mislabelled examples
C = 50
# y_test_oh = np.eye(C)[Y_test.reshape(-1)]
# X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
predY_test = model.predict(X_test)

In [None]:
print(predY_test.shape)
print(Y_test.shape)

In [None]:
print(np.argmax(predY_test[0]))
print(np.argmax(Y_test[0]))

In [None]:
# Heatmap Data
non_onehot_pred_test = np.argmax(predY_test, axis=1)
non_onehot_Y_test = np.argmax(Y_test, axis=1)
categories = [
"eCommerce",
"Photos",
"Stocks",
"Chat",
"Telephony",
"Medical",
"Backend",
"Travel",
"Domains",
"Data",
"Internet of Things",
"Transportation",
"Government",
"Marketing",
"File Sharing",
"Enterprise",
"Cloud",
"Games",
"Financial",
"Weather",
"Payments",
"Science",
"Email",
"Project Management",
"Other",
"Tools",
"Database",
"Storage",
"Banking",
"Application Development",
"Real Estate",
"Bitcoin",
"Messaging",
"Media",
"Security",
"Analytics",
"Entertainment",
"Images",
"Video",
"Sports",
"Education",
"News Services",
"Search",
"Shipping",
"Music",
"Events",
"Reference",
"Social",
"Mapping",
"Advertising", "All"]

print(non_onehot_pred_test.shape)
print(non_onehot_Y_test.shape)


## Compute correct number on each category

In [None]:
# Heat Map

# cross = pd.crosstab(non_onehot_Y_test, non_onehot_pred_test.reshape(len(non_onehot_pred_test),), rownames=['Actual'], colnames=['Predicted'], margins=True)
cross = pd.crosstab(non_onehot_Y_test, non_onehot_pred_test, rownames=['Actual'], colnames=['Predicted'], margins=True)  

In [None]:
cross.index = categories
cross.index.name = "Actual"
cross.columns = categories
cross.columns.name = "Predicted"
cross.head()

In [None]:
cross.columns

## Compute category accuracy

In [None]:
all = cross["All"]

In [None]:
result = []

all = cross["All"]

for i in range(0, 50):
    acc = cross.iloc[i,i] / all[i]
    result.append(acc)     

## Save acc in JSON file

In [None]:
acc_category = dict(zip(categories, result)) 

import json
with open('../Data/cnn_acc_category.json', 'w') as fp:
    json.dump(acc_category, fp)

In [None]:
f, ax = plt.subplots(figsize=(35,20))

sns.heatmap(cross, annot=True, vmin=0, fmt="d", vmax=50, ax=ax, linewidths=.3, cmap=plt.cm.Blues)

ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')


plt.yticks(rotation=0) 
plt.xticks(rotation=90) 

plt.savefig('CNNHeatMap.pdf', format='pdf', dpi=300)