In [1]:
#Proprietary content. © Great Learning. All Rights Reserved. Unauthorized use or distribution prohibited.

# Deep learning examples using Keras + Tensorflow: Step 2

# CNN example
### warning: It takes quite some time to build the model

In [6]:
from keras.datasets import cifar10 # subroutines for fetching the CIFAR-10 dataset
from keras.models import Model # basic class for specifying and training a neural network
from keras.layers import Input, Convolution2D, MaxPooling2D, Dense, Dropout, Activation, Flatten
from keras.utils import np_utils # utilities for one-hot encoding of ground truth values
import numpy as np

batch_size = 32 # in each iteration, we consider 32 training examples at once
num_epochs = 2 # we iterate 20 times over the entire training set
kernel_size = 3 # we will use 3x3 kernels throughout
pool_size = 2 # we will use 2x2 pooling throughout
conv_depth_1 = 32 # we will initially have 32 kernels per conv. layer...
conv_depth_2 = 64 # ...switching to 64 after the first pooling layer
drop_prob_1 = 0.25 # dropout after pooling with probability 0.25
drop_prob_2 = 0.5 # dropout in the FC layer with probability 0.5
hidden_size = 512 # the FC layer will have 512 neurons

(X_train, y_train), (X_test, y_test) = cifar10.load_data() # fetch CIFAR-10 data

print(X_train[1])

[[[154 177 187]
  [126 137 136]
  [105 104  95]
  ..., 
  [ 91  95  71]
  [ 87  90  71]
  [ 79  81  70]]

 [[140 160 169]
  [145 153 154]
  [125 125 118]
  ..., 
  [ 96  99  78]
  [ 77  80  62]
  [ 71  73  61]]

 [[140 155 164]
  [139 146 149]
  [115 115 112]
  ..., 
  [ 79  82  64]
  [ 68  70  55]
  [ 67  69  55]]

 ..., 
 [[175 167 166]
  [156 154 160]
  [154 160 170]
  ..., 
  [ 42  34  36]
  [ 61  53  57]
  [ 93  83  91]]

 [[165 154 128]
  [156 152 130]
  [159 161 142]
  ..., 
  [103  93  96]
  [123 114 120]
  [131 121 131]]

 [[163 148 120]
  [158 148 122]
  [163 156 133]
  ..., 
  [143 133 139]
  [143 134 142]
  [143 133 144]]]


In [7]:
num_train, depth, height, width = X_train.shape # there are 50000 training examples in CIFAR-10 
num_test = X_test.shape[0] # there are 10000 test examples in CIFAR-10
num_classes = np.unique(y_train).shape[0] # there are 10 image classes

X_train = X_train.astype('float32') 
X_test = X_test.astype('float32')
X_train /= np.max(X_train) # Normalise data to [0, 1] range
X_test /= np.max(X_train) # Normalise data to [0, 1] range

Y_train = np_utils.to_categorical(y_train, num_classes) # One-hot encode the labels
Y_test = np_utils.to_categorical(y_test, num_classes) # One-hot encode the labels

inp = Input(shape=(depth, height, width)) # N.B. depth goes first in Keras!
# Conv [32] -> Conv [32] -> Pool (with dropout on the pooling layer)
conv_1 = Convolution2D(conv_depth_1, kernel_size, kernel_size, border_mode='same', activation='relu')(inp)
conv_2 = Convolution2D(conv_depth_1, kernel_size, kernel_size, border_mode='same', activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)
# Conv [64] -> Conv [64] -> Pool (with dropout on the pooling layer)
conv_3 = Convolution2D(conv_depth_2, kernel_size, kernel_size, border_mode='same', activation='relu')(drop_1)
conv_4 = Convolution2D(conv_depth_2, kernel_size, kernel_size, border_mode='same', activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size))(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)
# Now flatten to 1D, apply FC -> ReLU (with dropout) -> softmax
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes, activation='softmax')(drop_3)

model = Model(input=inp, output=out) # To define a model, just specify its input and output layers

model.compile(loss='categorical_crossentropy', # using the cross-entropy loss function
              optimizer='adam', # using the Adam optimiser
              metrics=['accuracy']) # reporting the accuracy

model.fit(X_train, Y_train, # Train the model using the training set...
          batch_size=batch_size, nb_epoch=num_epochs,
          verbose=1, validation_split=0.1) # ...holding out 10% of the data for validation


Train on 45000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1dca1e20a20>

# Test the model after fitting

In [15]:
model.evaluate(X_test, Y_test, verbose=1) # Evaluate the trained model on the test set!



[9.0282660240173342, 0.4294]

In [16]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 32, 32, 3)     0                                            
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 32, 32)    896         input_2[0][0]                    
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 32, 32, 32)    9248        convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 16, 16, 32)    0           convolution2d_2[0][0]            
___________________________________________________________________________________________

# Keras example with text data

In [12]:
#Keras examples with text data
import numpy as np
import logging
import sys
import time

from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Input, Merge, Convolution1D, MaxPooling1D, LSTM

from keras.utils import np_utils
from keras.optimizers import Adam, SGD

import pandas

from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

#Function to load the data from input file
def load_data_from_file(input_filename, input_column="Summary", output_column="Labels", max_features=300):
  thedata = pandas.read_csv(input_filename, sep=',', delimiter=',', header='infer', names=None, encoding='latin-1')

  print(thedata.head())

  x_str_summary = thedata[input_column]
  vectorizer = TfidfVectorizer(min_df=1, stop_words='english', max_features=max_features)
  featuresData = vectorizer.fit_transform(x_str_summary)
  x_summary = featuresData.toarray()

  y_str = thedata[output_column]
  cluster_encoder = LabelEncoder()

  y = cluster_encoder.fit_transform(y_str.astype('str'))
  
  return [x_summary, y]

# Training parameters
max_features = 500
num_epochs = 5
batch_size = 5
dimof_middle = 100
dropout = 0.5
verbose = 1
filter_sizes = (3, 4)
num_filters = 100
dropout_prob = (0.25, 0.5)
embedding_dim = 20
val_split = 0.1

# Load data
print("Loading data...")
X, y = load_data_from_file("text_data.csv", max_features=max_features)
X_train, X_test, y_train_cat, y_test_cat = train_test_split(X, y, test_size=0.5)
X_train = X
y_train_cat = y
print("training record count = ", len(X_train), " testing record count = ", len(X_test))
sequence_length = X_train[1].shape[0]

y_train = np_utils.to_categorical(y_train_cat)
y_test = np_utils.to_categorical(y_test_cat)




Loading data...
   IncidentId IncidentType  Category        SubmitDate    ClosedDateTime  \
0           1      Request  hardware  01/06/2015 00:18  09/06/2015 04:14   
1           2      Request  hardware  01/06/2015 00:25  08/06/2015 17:53   
2           3      Request  hardware  01/06/2015 00:32  18/06/2015 03:42   
3           4      Request  hardware  01/06/2015 00:37  12/06/2015 12:01   
4           5      Request   battery  01/06/2015 01:03  01/06/2015 01:27   

     Priority                                           Summary    Labels  \
0  2 - Medium       hardware issue  is reported so please check  hardware   
1     3 - Low  hardware has many issues so returning the mobile  hardware   
2     3 - Low                              hardware is not good  hardware   
3    1 - High                                   hardware issues  hardware   
4     3 - Low                       battery is there. not happy   battery   

   LabelsIndex  
0            1  
1            1  
2            

In [13]:
print('Build model...')

# Get dimensions of input and output
dimof_input = embedding_dim
dimof_output = np.max(y_train_cat) + 1

print('dimof_input: ', dimof_input)
print('dimof_output: ', dimof_output)

# main sequential model
model = Sequential()
model.add(Embedding(max_features, embedding_dim, input_length=sequence_length,
                        weights=None))

model.add(Dropout(dropout_prob[0], input_shape=(sequence_length, embedding_dim)))
#model.add(graph)
model.add(LSTM(output_dim=dimof_middle, activation='sigmoid', inner_activation='hard_sigmoid'))
model.add(Dense(dimof_middle))
model.add(Dropout(dropout))
model.add(Dense(dimof_middle, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(dimof_middle, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(dimof_output, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])



Build model...
dimof_input:  20
dimof_output:  3


In [14]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_3 (Embedding)          (None, 15, 20)        10000       embedding_input_3[0][0]          
____________________________________________________________________________________________________
dropout_12 (Dropout)             (None, 15, 20)        0           embedding_3[0][0]                
____________________________________________________________________________________________________
lstm_2 (LSTM)                    (None, 100)           48400       dropout_12[0][0]                 
____________________________________________________________________________________________________
dense_11 (Dense)                 (None, 100)           10100       lstm_2[0][0]                     
___________________________________________________________________________________________

In [15]:
start_time = time.time()
results = model.fit(X_train, y_train, batch_size=batch_size,
          nb_epoch=num_epochs, validation_split=val_split, verbose=1)

                    
average_time_per_epoch = (time.time() - start_time) / num_epochs

#print("Baseline: %.2f%%" % (results.history['val_acc'][0]*100))

# Evaluate
loss, accuracy = model.evaluate(X_test, y_test, verbose=verbose)
print('loss: ', loss)
print('accuracy: ', accuracy)
print()

# Predict
y_pred = model.predict_classes(X_test, batch_size=batch_size, verbose=verbose)

test_acc = round(accuracy_score(y_test_cat, y_pred)*100)
print("Test accuracy:", test_acc)

print(y_pred)
for i in range(len(X_test)):
    preds = y_pred[i]
    print(i, preds, y_test_cat[i])

Train on 18 samples, validate on 2 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss:  0.996205985546
accuracy:  0.5

[2 2 2 2 2 2 2 2 2 2]
0 2 0
1 2 2
2 2 0
3 2 0
4 2 2
5 2 2
6 2 0
7 2 2
8 2 2
9 2 0


In [18]:
# Building model
# ==================================================
#
# graph subnet with one input and one output,
# convolutional layers concateneted in parallel
graph_in = Input(shape=(sequence_length, embedding_dim))
convs = []
for fsz in filter_sizes:
    conv = Convolution1D(nb_filter=num_filters,
                         filter_length=fsz,
                         border_mode='valid',
                         activation='relu',
                         subsample_length=1)(graph_in)
    pool = MaxPooling1D(pool_length=2)(conv)
    flatten = Flatten()(pool)
    convs.append(flatten)
    
if len(filter_sizes)>1:
    out = Merge(mode='concat')(convs)
else:
    out = convs[0]

graph = Model(input=graph_in, output=out)

print('Build model...')

# Get dimensions of input and output
dimof_input = embedding_dim
dimof_output = np.max(y_train_cat) + 1

print('dimof_input: ', dimof_input)
print('dimof_output: ', dimof_output)

# main sequential model
convmodel = Sequential()
convmodel.add(Embedding(max_features, embedding_dim, input_length=sequence_length,
                        weights=None))

convmodel.add(Dropout(dropout_prob[0], input_shape=(sequence_length, embedding_dim)))
convmodel.add(graph)
convmodel.add(Dense(dimof_middle))
convmodel.add(Dropout(dropout))
convmodel.add(Dense(dimof_middle, activation='relu'))
convmodel.add(Dropout(dropout))
convmodel.add(Dense(dimof_middle, activation='relu'))
convmodel.add(Dropout(dropout))
convmodel.add(Dense(dimof_output, activation='softmax'))

convmodel.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])



Build model...
dimof_input:  20
dimof_output:  3


In [19]:
convmodel.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_5 (Embedding)          (None, 15, 20)        10000       embedding_input_5[0][0]          
____________________________________________________________________________________________________
dropout_20 (Dropout)             (None, 15, 20)        0           embedding_5[0][0]                
____________________________________________________________________________________________________
model_6 (Model)                  (None, 1200)          14200       dropout_20[0][0]                 
____________________________________________________________________________________________________
dense_19 (Dense)                 (None, 100)           120100      model_6[1][0]                    
___________________________________________________________________________________________

In [20]:
graph.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_6 (InputLayer)             (None, 15, 20)        0                                            
____________________________________________________________________________________________________
convolution1d_9 (Convolution1D)  (None, 13, 100)       6100        input_6[0][0]                    
____________________________________________________________________________________________________
convolution1d_10 (Convolution1D) (None, 12, 100)       8100        input_6[0][0]                    
____________________________________________________________________________________________________
maxpooling1d_9 (MaxPooling1D)    (None, 6, 100)        0           convolution1d_9[0][0]            
___________________________________________________________________________________________

In [21]:
start_time = time.time()
results = convmodel.fit(X_train, y_train, batch_size=batch_size,
          nb_epoch=num_epochs, validation_split=val_split, verbose=1)

                    
average_time_per_epoch = (time.time() - start_time) / num_epochs

#print("Baseline: %.2f%%" % (results.history['val_acc'][0]*100))

# Evaluate
loss, accuracy = convmodel.evaluate(X_test, y_test, verbose=verbose)
print('loss: ', loss)
print('accuracy: ', accuracy)
print()

# Predict
y_pred = convmodel.predict_classes(X_test, batch_size=batch_size, verbose=verbose)

test_acc = round(accuracy_score(y_test_cat, y_pred)*100)
print("Test accuracy:", test_acc)

print(y_pred)
for i in range(len(X_test)):
    preds = y_pred[i]
    print(i, preds, y_test_cat[i])

Train on 18 samples, validate on 2 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss:  1.06026983261
accuracy:  0.5

[2 2 2 2 2 2 2 2 2 2]
0 2 0
1 2 2
2 2 0
3 2 0
4 2 2
5 2 2
6 2 0
7 2 2
8 2 2
9 2 0
