In [1]:
import numpy as np, pandas as pd, tensorflow as tf

  return f(*args, **kwds)


# Dataset Loading

#### Classify on Stars

In [2]:
## Amazon Reviews
## https://www.kaggle.com/snap/amazon-fine-food-reviews
reviews = pd.read_csv('data/Reviews.csv')
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568454 entries, 0 to 568453
Data columns (total 10 columns):
Id                        568454 non-null int64
ProductId                 568454 non-null object
UserId                    568454 non-null object
ProfileName               568438 non-null object
HelpfulnessNumerator      568454 non-null int64
HelpfulnessDenominator    568454 non-null int64
Score                     568454 non-null int64
Time                      568454 non-null int64
Summary                   568427 non-null object
Text                      568454 non-null object
dtypes: int64(5), object(5)
memory usage: 43.4+ MB


In [3]:
try:
    del reviews['ProfileName']
except KeyError:
    print('No such column')
    
try:
    del reviews['Summary']
except KeyError:
    print('No such column')

In [4]:
#Remove rows where numerator > denominator. Assume this is entry error in dataset.
faulty_rows = reviews[reviews['HelpfulnessNumerator'] > reviews['HelpfulnessDenominator']]
reviews = reviews[reviews['HelpfulnessNumerator'] <= reviews['HelpfulnessDenominator']]

faulty_rows

Unnamed: 0,Id,ProductId,UserId,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Text
44736,44737,B001EQ55RW,A2V0I904FH7ABY,3,2,4,1212883200,It was almost a 'love at first bite' - the per...
64421,64422,B000MIDROQ,A161DK06JJMCYF,3,1,5,1224892800,My son loves spaghetti so I didn't hesitate or...


In [5]:
import math
# [CITE] https://www.evanmiller.org/how-not-to-sort-by-average-rating.html
# "The lower bound of Wilson score confidence interval for a Bernoulli parameter"
def lbc(positive_votes, total_votes):
    negative_votes = total_votes - positive_votes
    if total_votes == 0:
        return 0.0
    lower_bound = ((positive_votes + 1.9208) / (total_votes) - 1.96 * math.sqrt((total_votes * negative_votes) / (total_votes) + 0.9604) / 
        (total_votes)) / (1 + 3.8416 / (total_votes))
    return lower_bound

### Modify Dataset

In [6]:
%%time

# Add the Lbc column
lbc_for_row = lambda row: lbc(row['HelpfulnessNumerator'], row['HelpfulnessDenominator'])
reviews['Lbc'] = reviews.apply(lbc_for_row, axis=1)

# Make Score zero indexed
reviews['Score'] = reviews['Score']-1

CPU times: user 8.38 s, sys: 43.4 ms, total: 8.43 s
Wall time: 8.42 s


## Reduce Dataset Size

In [7]:
data = reviews.sample(frac=1)
data.head()

Unnamed: 0,Id,ProductId,UserId,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Text,Lbc
395895,395896,B001CWSKFC,A197OKILYY934X,0,0,4,1298592000,I recently discovered these at a local Walmart...,0.0
9837,9838,B003W07D1E,A27CKOACMX8O5F,0,1,1,1301356800,Love the cinnamon taste but way too sweet. I g...,-0.170084
217617,217618,B000LQLV7E,A5O3M8WH3WLMW,0,0,4,1346371200,good shipment system<br />nice taste<br />hot ...,0.0
39973,39974,B001TZJ3OE,A20824UL50NSJB,0,0,4,1167782400,kids love the sauce on many things... great on...,0.0
328259,328260,B0001AO9LA,A3EIE7D4PET3B6,0,0,4,1346889600,"I don't know what other reviewers received, bu...",0.0


## Preprocessing

In [8]:
quartile = .85
review_length = int(data['Text'].str.len().quantile(q=quartile))

longest = data[data['Text'].str.len() == review_length]
print("Length of quartile", review_length)
longest['Text'].tolist()[0]

Length of quartile 715


"I generally like vanilla flavoured beverages but I always take coffee black and unsweetened, I thought this would be a nice change and a combination of the 2.  Not so.  It just tasted .. well, wrong.  It tasted like weak custard which I guess is evidence that the french vanilla flavour is there but, for me, custard should remain a topping for apple pie, not in my coffee... LOL!  I do like Folger's Cappuccino flavoured beverages but as they are loaded with sugar I don't drink them that often and if I do I like them with cold with blended ice.  I have the hazelnut in this too which I find more palatable but I guess I will stick to straight black coffee from now on... my favourite being Illy Espresso.. yummy!"

### Tokenize the dataset

In [9]:
%%time
import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

NUM_TOP_WORDS = 120000
MAX_ART_LEN = int(data['Text'].str.len().quantile(q=quartile)) # maximum and minimum number of words 
                                                               #  based on a quartile of review length

tokenizer = Tokenizer(num_words=NUM_TOP_WORDS)
tokenizer.fit_on_texts(data.Text)
sequences = tokenizer.texts_to_sequences(data.Text)

word_index = tokenizer.word_index
NUM_TOP_WORDS = len(word_index) if NUM_TOP_WORDS==None else NUM_TOP_WORDS
top_words = min((len(word_index),NUM_TOP_WORDS))
print('Found %s unique tokens. Distilled to %d top words.' % (len(word_index),top_words))

X = pad_sequences(sequences, maxlen=MAX_ART_LEN)

y_ohe = keras.utils.to_categorical(data['Score'])
print('Shape of data tensor:', X.shape)
print('Shape of label tensor:', y_ohe.shape)
print(np.max(X))

Using TensorFlow backend.


Found 133038 unique tokens. Distilled to 120000 top words.
Shape of data tensor: (568452, 715)
Shape of label tensor: (568452, 5)
119999
CPU times: user 42.9 s, sys: 780 ms, total: 43.7 s
Wall time: 43.6 s


In [10]:
from sklearn.model_selection import train_test_split
# Split it into train / test subsets
X_train, X_test, y_train_ohe, y_test_ohe = train_test_split(X, y_ohe, test_size=0.2,
                                                            stratify=data['Score'], 
                                                            random_state=42)
NUM_CLASSES = y_ohe.shape[1]
print(X_train.shape,y_train_ohe.shape)
print(np.sum(y_train_ohe,axis=0))

(454761, 715) (454761, 5)
[  41814.   23815.   34112.   64523.  290497.]


### Load the embeding

In [16]:
%%time

EMBED_SIZE = 100
# the embed size should match the file you load glove from
embeddings_index = {}
f = open('embeddings/glove.6B.100d.txt')
# save key/array pairs of the embeddings
#  the key of the dictionary is the word, the array is the embedding
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

# now fill in the matrix, using the ordering from the
#  keras word tokenizer from before
embedding_matrix = np.zeros((len(word_index) + 1, EMBED_SIZE))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

print(embedding_matrix.shape)

# Define the embeding layer
from keras.layers import Embedding

embedding_layer1 = Embedding(len(word_index) + 1,
                            EMBED_SIZE,
                            weights=[embedding_matrix],
                            input_length=MAX_ART_LEN,
                            trainable=True)

embedding_layer2 = Embedding(len(word_index) + 1,
                            EMBED_SIZE,
                            weights=[embedding_matrix],
                            input_length=MAX_ART_LEN,
                            trainable=True)

embedding_layer3 = Embedding(len(word_index) + 1,
                            EMBED_SIZE,
                            weights=[embedding_matrix],
                            input_length=MAX_ART_LEN,
                            trainable=True)

Found 400000 word vectors.
(133039, 100)
CPU times: user 7.18 s, sys: 82.2 ms, total: 7.27 s
Wall time: 7.2 s


## Network 1

In [12]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D # Convolution Layers
from keras.layers import Dense                # Dense Layers
from keras.layers import GRU                  # Recurrent Layers

rnn1 = Sequential()
rnn1.add(embedding_layer1)
rnn1.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
rnn1.add(MaxPooling1D(pool_size=2))
rnn1.add(GRU(100,dropout=0.2, recurrent_dropout=0.2))
rnn1.add(Dense(NUM_CLASSES, activation='sigmoid'))
rnn1.compile(loss='categorical_crossentropy',
              optimizer='Adam', 
              metrics=['accuracy'])
print(rnn1.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 715, 100)          13303900  
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 715, 32)           9632      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 357, 32)           0         
_________________________________________________________________
gru_1 (GRU)                  (None, 100)               39900     
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 505       
Total params: 13,353,937
Trainable params: 50,037
Non-trainable params: 13,303,900
_________________________________________________________________
None


In [13]:
%%time

history1 = rnn1.fit(X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=2, batch_size=32)

Train on 454761 samples, validate on 113691 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
CPU times: user 4h 57min 53s, sys: 13min 48s, total: 5h 11min 42s
Wall time: 3h 26min 27s


<keras.callbacks.History at 0x7f44ef714e48>

In [14]:
y_hat = np.argmax(rnn1.predict(X_test), axis=1)

In [15]:
from sklearn.metrics import confusion_matrix

y_test = np.argmax(y_test_ohe, axis=1)

cm = confusion_matrix(y_test, y_hat)
print(cm)

[[ 8280   309   496   103  1266]
 [ 2909   560  1150   323  1012]
 [ 1727   466  2599  1514  2222]
 [  695   101  1245  3969 10121]
 [ 1443    63   622  2097 68399]]


# Results 
#### 10% of Dataset
Train on 45476 samples, validate on 11369 samples

Epoch 1/4
45476/45476 [==============================] - 451s 10ms/step - loss: 1.0291 - acc: 0.6508 - val_loss: 0.8893 - val_acc: 0.6783

Epoch 2/4
45476/45476 [==============================] - 448s 10ms/step - loss: 0.8846 - acc: 0.6804 - val_loss: 0.8173 - val_acc: 0.6968

Epoch 3/4
45476/45476 [==============================] - 453s 10ms/step - loss: 0.8355 - acc: 0.6923 - val_loss: 0.7954 - val_acc: 0.7038

Epoch 4/4
45476/45476 [==============================] - 442s 10ms/step - loss: 0.8052 - acc: 0.6997 - val_loss: 0.7769 - val_acc: 0.7107

CPU times: user 1h 44min 42s, sys: 27min 2s, total: 2h 11min 45s
Wall time: 29min 56s
```
[[ 594    2  138   26  282]
 [ 187    5  183   58  165]
 [  87    4  265  182  316]
 [  32    1  152  315 1107]
 [  58    0  103  206 6901]]
```
#### 25% of Dataset
Train on 113690 samples, validate on 28423 samples

Epoch 1/4
113690/113690 [==============================] - 785s 7ms/step - loss: 0.9327 - acc: 0.6701 - val_loss: 0.8315 - val_acc: 0.6937

Epoch 2/4
113690/113690 [==============================] - 785s 7ms/step - loss: 0.8090 - acc: 0.7014 - val_loss: 0.7651 - val_acc: 0.7136

Epoch 3/4
113690/113690 [==============================] - 784s 7ms/step - loss: 0.7749 - acc: 0.7120 - val_loss: 0.7545 - val_acc: 0.7154

Epoch 4/4
113690/113690 [==============================] - 784s 7ms/step - loss: 0.7517 - acc: 0.7191 - val_loss: 0.7304 - val_acc: 0.7266

CPU times: user 1h 13min 8s, sys: 3min 12s, total: 1h 16min 21s
Wall time: 52min 17s
```
[[ 2006    61    99    24   411]
 [  683   137   236    63   373]
 [  491   129   450   293   768]
 [  191    43   255   731  2820]
 [  350    23   141   318 17327]]
 ```
 #### 50% of Dataset
 Train on 227380 samples, validate on 56846 samples
 
Epoch 1/4
227380/227380 [==============================] - 1547s 7ms/step - loss: 0.8767 - acc: 0.6832 - val_loss: 0.7632 - val_acc: 0.7135

Epoch 2/4
227380/227380 [==============================] - 1550s 7ms/step - loss: 0.7758 - acc: 0.7119 - val_loss: 0.7332 - val_acc: 0.7257

Epoch 3/4
227380/227380 [==============================] - 1554s 7ms/step - loss: 0.7447 - acc: 0.7217 - val_loss: 0.7117 - val_acc: 0.7352

Epoch 4/4
227380/227380 [==============================] - 1555s 7ms/step - loss: 0.7239 - acc: 0.7285 - val_loss: 0.7185 - val_acc: 0.7317

CPU times: user 2h 30min, sys: 7min 19s, total: 2h 37min 20s
Wall time: 1h 43min 26s
```
[[ 3375   348   204    40  1231]
 [  917   517   424   120  1007]
 [  426   332   933   578  1988]
 [  152    47   351  1209  6286]
 [  257    46   131   364 35563]]
 ```
 #### 100% of Dataset
 Train on 454761 samples, validate on 113691 samples
 
Epoch 1/4
454761/454761 [==============================] - 3094s 7ms/step - loss: 0.8326 - acc: 0.6952 - val_loss: 0.7404 - val_acc: 0.7233

Epoch 2/4
454761/454761 [==============================] - 3096s 7ms/step - loss: 0.7582 - acc: 0.7169 - val_loss: 0.7318 - val_acc: 0.7235

Epoch 3/4
454761/454761 [==============================] - 3098s 7ms/step - loss: 0.7566 - acc: 0.7174 - val_loss: 0.7054 - val_acc: 0.7344

Epoch 4/4
454761/454761 [==============================] - 3099s 7ms/step - loss: 0.7325 - acc: 0.7254 - val_loss: 0.6979 - val_acc: 0.7371

CPU times: user 4h 57min 53s, sys: 13min 48s, total: 5h 11min 42s
Wall time: 3h 26min 27s
```
[[ 8280   309   496   103  1266]
 [ 2909   560  1150   323  1012]
 [ 1727   466  2599  1514  2222]
 [  695   101  1245  3969 10121]
 [ 1443    63   622  2097 68399]]
 ```

## Network 2

In [12]:
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.layers import Conv1D, MaxPooling1D # Convolution Layers
from keras.layers import Dense                # Dense Layers
from keras.layers import LSTM                 # Recurrent Layers

rnn2 = Sequential()
rnn2.add(embedding_layer2)
rnn2.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
rnn2.add(MaxPooling1D(pool_size=2))
rnn2.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
rnn2.add(MaxPooling1D(pool_size=2))
rnn2.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
rnn2.add(MaxPooling1D(pool_size=2))
rnn2.add(BatchNormalization())
rnn2.add(LSTM(100,dropout=0.25, recurrent_dropout=0.2, unroll=True))
rnn2.add(Dense(64))
rnn2.add(Dense(64))
rnn2.add(Dense(NUM_CLASSES, activation='sigmoid'))
rnn2.compile(loss='categorical_crossentropy',
              optimizer='Adam', 
              metrics=['accuracy'])
print(rnn2.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 715, 100)          13303900  
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 715, 32)           9632      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 357, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 357, 64)           6208      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 178, 64)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 178, 64)           12352     
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 89, 64)            0         
__________

In [13]:
%%time

history2 = rnn2.fit(X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=2, batch_size=32)

Train on 454761 samples, validate on 113691 samples
Epoch 1/2

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 2/2
CPU times: user 35min 34s, sys: 1min 39s, total: 37min 14s
Wall time: 34min 55s


In [14]:
y_hat = np.argmax(rnn2.predict(X_test), axis=1)

In [15]:
from sklearn.metrics import confusion_matrix

y_test = np.argmax(y_test_ohe, axis=1)

cm = confusion_matrix(y_test, y_hat)
print(cm)

[[ 7739  1300   605   123   687]
 [ 1551  2106  1584   291   422]
 [  558   949  4319  1738   964]
 [  224   165  1408  8047  6287]
 [  507   163   747  5231 65976]]


In [19]:
err = y_test-y_hat

In [20]:
print(err)

[1 4 0 ..., 0 0 0]


# Results 
#### 10% of Dataset
Train on 45476 samples, validate on 11369 samples

Epoch 1/4
45476/45476 [==============================] - 237s 5ms/step - loss: 0.9874 - acc: 0.6574 - val_loss: 0.9331 - val_acc: 0.6636

Epoch 2/4
45476/45476 [==============================] - 236s 5ms/step - loss: 0.8625 - acc: 0.6846 - val_loss: 0.8611 - val_acc: 0.6892

Epoch 3/4
45476/45476 [==============================] - 237s 5ms/step - loss: 0.8107 - acc: 0.6965 - val_loss: 0.8028 - val_acc: 0.6973

Epoch 4/4
45476/45476 [==============================] - 236s 5ms/step - loss: 0.7780 - acc: 0.7080 - val_loss: 0.8506 - val_acc: 0.6994

CPU times: user 56min 14s, sys: 14min 44s, total: 1h 10min 59s
Wall time: 15min 49s
```
[[ 616   11   42   10  363]
 [ 198   10   64   39  287]
 [ 104    2   99   77  572]
 [  42    1   33   68 1463]
 [  58    0   25   27 7158]]
```
#### 25% of Dataset
Train on 113690 samples, validate on 28423 samples

Epoch 1/4
113690/113690 [==============================] - 274s 2ms/step - loss: 0.9015 - acc: 0.6761 - val_loss: 0.8047 - val_acc: 0.6987

Epoch 2/4
113690/113690 [==============================] - 273s 2ms/step - loss: 0.7932 - acc: 0.7021 - val_loss: 0.7815 - val_acc: 0.7025

Epoch 3/4
113690/113690 [==============================] - 273s 2ms/step - loss: 0.7555 - acc: 0.7145 - val_loss: 0.8046 - val_acc: 0.6960

Epoch 4/4
113690/113690 [==============================] - 273s 2ms/step - loss: 0.7293 - acc: 0.7245 - val_loss: 0.7831 - val_acc: 0.7154

CPU times: user 26min 3s, sys: 1min 13s, total: 27min 16s
Wall time: 18min 13s
```
[[ 1551    62    85    23   880]
 [  509   110   230    70   573]
 [  237    91   373   326  1104]
 [   78    28   177   599  3158]
 [  128    13    83   233 17702]]
 ```
 #### 50% of Dataset
 Train on 227380 samples, validate on 56846 samples
 
Epoch 1/4
227380/227380 [==============================] - 543s 2ms/step - loss: 0.8495 - acc: 0.6899 - val_loss: 0.7786 - val_acc: 0.7036

Epoch 2/4
227380/227380 [==============================] - 545s 2ms/step - loss: 0.7544 - acc: 0.7170 - val_loss: 0.7779 - val_acc: 0.6993

Epoch 3/4
227380/227380 [==============================] - 545s 2ms/step - loss: 0.7212 - acc: 0.7283 - val_loss: 0.7270 - val_acc: 0.7259

Epoch 4/4
227380/227380 [==============================] - 544s 2ms/step - loss: 0.6999 - acc: 0.7352 - val_loss: 0.7443 - val_acc: 0.7299

CPU times: user 50min 21s, sys: 2min 18s, total: 52min 40s
Wall time: 36min 18s
```
[[ 3708    70   204    67  1149]
 [ 1245   127   454   194   965]
 [  618   104   753   865  1917]
 [  173    18   270  1570  6014]
 [  313     4   102   606 35336]]
 ```
 #### 100% of Dataset
 Train on 454761 samples, validate on 113691 samples
 
Epoch 1/4
454761/454761 [==============================] - 1085s 2ms/step - loss: 0.8098 - acc: 0.7001 - val_loss: 0.7436 - val_acc: 0.7193

Epoch 2/4
454761/454761 [==============================] - 1089s 2ms/step - loss: 0.7243 - acc: 0.7271 - val_loss: 0.7504 - val_acc: 0.7266

Epoch 3/4
454761/454761 [==============================] - 1089s 2ms/step - loss: 0.6966 - acc: 0.7364 - val_loss: 0.7103 - val_acc: 0.7338

Epoch 4/4
454761/454761 [==============================] - 1089s 2ms/step - loss: 0.6782 - acc: 0.7429 - val_loss: 0.6988 - val_acc: 0.7354

CPU times: user 1h 44min 8s, sys: 4min 53s, total: 1h 49min 1s
Wall time: 1h 12min 31s
```
[[ 6899   726  1663    93  1073]
 [ 1535   781  2684   249   705]
 [  728   329  4397  1602  1472]
 [  266    47  2311  4983  8524]
 [  792    44  1657  3577 66554]]
 ```
 ## Training Embedding
 #### 2 Epochs
 Train on 454761 samples, validate on 113691 samples
 
Epoch 1/2
454761/454761 [==============================] - 1038s 2ms/step - loss: 0.7174 - acc: 0.7310 - val_loss: 0.6536 - val_acc: 0.7576

Epoch 2/2
454761/454761 [==============================] - 1035s 2ms/step - loss: 0.5844 - acc: 0.7799 - val_loss: 0.5877 - val_acc: 0.7829

CPU times: user 35min 26s, sys: 1min 36s, total: 37min 2s
Wall time: 34min 43s
```
[[ 8332   882   598    50   592]
 [ 2046  1847  1444   209   408]
 [  791   827  4332  1484  1094]
 [  286   140  1557  6656  7492]
 [  762   130   839  3054 67839]]
 ```
 #### 4 Epochs
Train on 454761 samples, validate on 113691 samples

Epoch 1/4
454761/454761 [==============================] - 1039s 2ms/step - loss: 0.7166 - acc: 0.7314 - val_loss: 0.6305 - val_acc: 0.7638

Epoch 2/4
454761/454761 [==============================] - 1036s 2ms/step - loss: 0.5836 - acc: 0.7805 - val_loss: 0.5858 - val_acc: 0.7810

Epoch 3/4
454761/454761 [==============================] - 1036s 2ms/step - loss: 0.5079 - acc: 0.8119 - val_loss: 0.5684 - val_acc: 0.7955

Epoch 4/4
454761/454761 [==============================] - 1036s 2ms/step - loss: 0.4453 - acc: 0.8384 - val_loss: 0.5913 - val_acc: 0.7899

CPU times: user 1h 10min 36s, sys: 3min 19s, total: 1h 13min 56s
Wall time: 1h 9min 17s
```
[[ 7640  1949   308    59   498]
 [ 1124  3690   665   112   363]
 [  604  1982  4115   965   862]
 [  274   498  1659  7581  6119]
 [  734   537  1011  3568 66774]]

In [34]:
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.layers import Conv1D, MaxPooling1D # Convolution Layers
from keras.layers import Dense                # Dense Layers
from keras.layers import LSTM                 # Recurrent Layers

rnn3 = Sequential()
rnn3.add(embedding_layer3)
rnn3.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
rnn3.add(MaxPooling1D(pool_size=2))
rnn3.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
rnn3.add(MaxPooling1D(pool_size=2))
rnn3.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))
rnn3.add(MaxPooling1D(pool_size=2))
rnn3.add(BatchNormalization())
rnn3.add(LSTM(100,dropout=0.25, recurrent_dropout=0.2, unroll=True))
rnn3.add(Dense(64))
rnn3.add(Dense(64))
rnn3.add(Dense(1, activation='sigmoid'))
rnn3.compile(loss='mean_squared_error',
              optimizer='Adam', 
              metrics=['accuracy'])
print(rnn3.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 715, 100)          13303900  
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 715, 32)           9632      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 357, 32)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 357, 64)           6208      
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 178, 64)           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 178, 64)           12352     
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 89, 64)            0         
__________

In [35]:
y_train_ordinal = np.argmax(y_train_ohe, axis=1)/5
y_test_ordinal = y_test/5

In [36]:
history3 = rnn3.fit(X_train, y_train_ordinal, validation_data=(X_test, y_test_ordinal), epochs=2, batch_size=32)

Train on 454761 samples, validate on 113691 samples
Epoch 1/2
Epoch 2/2


In [40]:
y_hat = rnn3.predict(X_test)

In [45]:
print(y_test_ordinal.mean())
print(y_hat.mean())

0.636636145341
0.640719


In [46]:
err = y_test_ordinal-y_hat

MemoryError: 