In [2]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
    
    
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [4]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
import numpy as np
import random
import nltk
from nltk.corpus import stopwords
from nltk.corpus import wordnet 
from random import sample
import string

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
data = pd.read_csv('./IMDB Dataset.csv')

In [6]:
# Create dictionary of hyper-parameters for our RNN

rnn_hyper_param = {
    'VOCAB_SIZE':50000,
    'EPOCHS':150,
    'BS':512,
    'LR':0.005,
    'OOV_TOK':"<OOV>",
    'MAX_LENGTH':520,
    'PADDING_TYPE':"post",
    'TRUNC_TYPE':"post"
}

In [7]:
# Create dictionary of hyper-parameters for our CNN

cnn_hyper_param = {
    'VOCAB_SIZE':50000,
    'EPOCHS':150,
    'BS':512,
    'LR':0.001,
    'OOV_TOK':"<OOV>",
    'MAX_LENGTH':520,
    'PADDING_TYPE':"post",
    'TRUNC_TYPE':"post"
}

In [8]:
# Keep to the same size of test set we've been using to test how well it generalizes.

x_train, x_test, y_train, y_test = train_test_split(data['review'], data['sentiment'], 
                                                    test_size=0.2, random_state=0, 
                                                    stratify=data['sentiment'])

In [9]:
# Tokenize using the training set vocabulary only.

tokenizer = Tokenizer(num_words = rnn_hyper_param['VOCAB_SIZE'], oov_token=rnn_hyper_param['OOV_TOK'])
tokenizer.fit_on_texts(x_train)
word_index = tokenizer.word_index

# Create sequences (i.e. apply Tokenized dictionary)
test_sequences = tokenizer.texts_to_sequences(x_test)
testing_padded = pad_sequences(test_sequences, padding=rnn_hyper_param['PADDING_TYPE'], 
                               maxlen=rnn_hyper_param['MAX_LENGTH'], truncating=rnn_hyper_param['TRUNC_TYPE'])

# Convert categorical data to numerical
y_test = list(y_test)
for i in range (len(y_test)):
    if y_test[i] == 'positive':
        y_test[i] = 1
    else:
        y_test[i] = 0
y_test = np.array(y_test)

In [10]:
# Create a new df using the train set.
data = {
    'review':x_train,
    'sentiment':y_train
}

data = pd.DataFrame(data)


In [11]:
# Iteratively split the train set into subsets with different sizes. This is used for
# testing the impact of training with lesser data with and without data augmentation

data_dict = {}
split_size = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for split in split_size:
    _, x_train, _, y_train = train_test_split(data['review'], data['sentiment'], 
                                                    test_size=split, random_state=0, 
                                                    stratify=data['sentiment'])
    df = {
        'review':x_train,
        'sentiment':y_train
    }
    
    df = pd.DataFrame(df)
    data_dict[split] = df


# <center>Data Augmentation

In [12]:
# function to create the best CNN model we found through grid search

def create_cnn():
    model = keras.Sequential([
    keras.layers.Embedding(len(word_index)+1, 30, input_length=520),
    
    keras.layers.Conv1D(40, 3, activation='relu'),
    keras.layers.MaxPooling1D(padding='same'),
    keras.layers.Dropout(0.3),
    
    keras.layers.Conv1D(5, 3, activation='relu'),
    keras.layers.MaxPooling1D(padding='same'),
    keras.layers.Dropout(0.3),
    
    keras.layers.Flatten(),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(loss='binary_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=cnn_hyper_param['LR']), 
                  metrics=['accuracy'])
    return model

In [13]:
# function to create the best RNN model we found through grid search

def create_rnn():
    model = keras.Sequential([
        keras.layers.Embedding(len(word_index)+1, 128, input_length=520),
        keras.layers.LSTM(100, return_sequences=True),
        keras.layers.Dropout(0.5),
        keras.layers.LSTM(100),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=rnn_hyper_param['LR']),
                     loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


In [14]:
def avgNestedLists(nested_vals):
    """
    Averages a 2-D array and returns a 1-D array of all of the columns
    averaged together, regardless of their dimensions.
    """
    
    output = []
    maximum = 0
    for lst in nested_vals:
        if len(lst) > maximum:
            maximum = len(lst)
    for index in range(maximum): # Go through each index of longest list
        temp = []
        for lst in nested_vals: # Go through each list
            if index < len(lst): # If not an index error
                temp.append(lst[index])
        output.append(np.nanmean(temp))
    return output

# <center>Best CNN without Data Augmentation

In [15]:
# Create our call backs for training our CNN. Patience values were decided by analyzing the 
# vanilla CNN behaviour during training

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=4, min_lr=0.00005, verbose=1)
earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=12)

In [19]:
# For each subset of data, train our CNN model on it three times.
# Store this information in a dictionary to be used later for analysis.

CNN_noaug_hist = {}
for size, dataset in data_dict.items():
    print(f"Training using {size} of dataset.")
    _history = []
    for i in range(3):
        CNN = create_cnn()
        x_train = list(dataset['review'])
        y_train = list(dataset['sentiment'])

        for j in range (len(y_train)):
            if y_train[j]=='positive':
                y_train[j] = 1
            else:
                y_train[j] = 0
        y_train = np.array(y_train)

        train_sequences = tokenizer.texts_to_sequences(x_train)
        training_padded = pad_sequences(train_sequences, padding=cnn_hyper_param['PADDING_TYPE'], 
                                        maxlen=cnn_hyper_param['MAX_LENGTH'], truncating=cnn_hyper_param['TRUNC_TYPE'])

        history = CNN.fit(training_padded, y_train, batch_size = cnn_hyper_param['BS'],
                                epochs=250, validation_data=(testing_padded, y_test),
                               callbacks=[reduce_lr, earlystop])
        _history.append(history)
    CNN_noaug_hist[f'{size}'] = _history

Training using 0.01 of dataset.
Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250

Epoch 00015: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 00025: early stopping
Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250

Epoch 00009: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/

Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78/250
Epoch 79/250
Epoch 80/250
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250


Epoch 85/250
Epoch 86/250
Epoch 87/250
Epoch 88/250
Epoch 89/250
Epoch 90/250
Epoch 91/250
Epoch 92/250
Epoch 93/250
Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250


Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250
Epoch 163/250
Epoch 164/250
Epoch 165/250
Epoch 166/250
Epoch 167/250
Epoch 168/250
Epoch 169/250
Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250


Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Epoch 231/250
Epoch 232/250
Epoch 233/250
Epoch 234/250
Epoch 235/250
Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 241/250
Epoch 242/250
Epoch 243/250
Epoch 244/250
Epoch 245/250
Epoch 246/250
Epoch 247/250
Epoch 248/250
Epoch 249/250
Epoch 250/250


Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250

Epoch 00018: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 00024: early stopping
Training using 0.05 of dataset.
Train on 2000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.000200000009499490

Epoch 29/250
Epoch 00029: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 00033: early stopping
Train on 2000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 00032: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 00036: early stopping
Train on 2000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250


Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 00033: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 00037: early stopping
Training using 0.1 of dataset.
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 00022: ReduceLROnPl

Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 00026: early stopping
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 00022: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 00026: early stopping
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 19/250
E

Epoch 00022: early stopping
Train on 8000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 00020: early stopping
Training using 0.3 of dataset.
Train on 12000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 00014: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: early stopping
Train on 12000 sampl

Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 00014: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: early stopping
Train on 16000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 00014: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: early stopping
Training using 0.5 of dataset.
Train on 20000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
E

Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: early stopping
Train on 20000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 00013: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: early stopping
Train on 20000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 00013: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: early stopping
Training using 0.6 of dataset.
Train on 24000 samp

Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 24000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 24000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 00013: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 14/250
Epoch 15/250
Epoch 16/250
E

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 28000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 28000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnP

Epoch 7/250
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 00011: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 00015: early stopping
Train on 32000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 32000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: 

Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 00016: early stopping
Train on 36000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 00011: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 00015: early stopping
Train on 36000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 00011: R

In [20]:
# Average out the vaidation accuracy of all three iterations, for each subset of data we used to train
# For example, we used 1% of data to train three times. Average the validation accuracy store
# it in a dictionary. Do this for all subsets we have (0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

new_dict = {}
for key, item in CNN_noaug_hist.items():
    avg_list = []
    for history in item:
        print(history.history['val_accuracy'])
        avg_list.append(history.history['val_accuracy'])
    print("========================================================")
    avg_list = avgNestedLists(avg_list)
    new_dict[key] = avg_list
    print(avg_list)
    print()

[0.4996, 0.4993, 0.4986, 0.4981, 0.4976, 0.4984, 0.5011, 0.5038, 0.5028, 0.501, 0.5053, 0.5053, 0.5053, 0.5045, 0.5039, 0.5033, 0.5025, 0.5024, 0.5023, 0.5022, 0.5021, 0.5024, 0.5028, 0.5029, 0.5029]
[0.5072, 0.5034, 0.5043, 0.5133, 0.5164, 0.518, 0.5178, 0.5196, 0.5204, 0.5196, 0.5191, 0.5177, 0.5177, 0.5178, 0.518, 0.5181, 0.5185, 0.5187, 0.5189, 0.5192, 0.5189, 0.5191, 0.5189, 0.5183, 0.5186, 0.5189, 0.5185, 0.5182, 0.5182, 0.5175, 0.5178, 0.5177, 0.5172, 0.5182, 0.5184, 0.5182, 0.5187, 0.5183, 0.5186, 0.5186, 0.5198, 0.5201, 0.5197, 0.5195, 0.5189, 0.5182, 0.5175, 0.5177, 0.5178, 0.5176, 0.5176, 0.5178, 0.5176, 0.5173, 0.5172, 0.5162, 0.5153, 0.5149, 0.5152, 0.5146, 0.5129, 0.5128, 0.5126, 0.5128, 0.512, 0.5116, 0.5107, 0.5109, 0.511, 0.5107, 0.5109, 0.5118, 0.5114, 0.5122, 0.5124, 0.5127, 0.5136, 0.5131, 0.5126, 0.5135, 0.5126, 0.5126, 0.5131, 0.5133, 0.514, 0.5151, 0.5153, 0.5165, 0.5174, 0.5172, 0.5163, 0.5159, 0.5162, 0.5155, 0.5155, 0.5161, 0.5161, 0.5159, 0.5156, 0.5154, 0.51

In [25]:
# Write the results into csv files with the name being the model name and the subset size

for title, item in new_dict.items():
    temp_dict = {title:item}
    # convert the history.history dict to a pandas DataFrame:     
    hist_df = pd.DataFrame(temp_dict)

    # save to csv:
    hist_csv_file = 'CNN_NoAug_History_' + str(title) + '.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)

# <center>Best RNN without Data Augmentation

In [59]:
# Create callbacks for our RNN with patience value determined in the vanilla RNN code

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=8, min_lr=0.00005, verbose=1)
earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=16)

In [60]:
# For each subset of data, train our RNN model on it three times.
# Store this information in a dictionary to be used later for analysis.

RNN_noaug_hist = {}
for size, dataset in data_dict.items():
    print(f"Training using {size} of dataset.")
    _history = []
    for i in range(3):
        RNN = create_rnn()
        x_train = list(dataset['review'])
        y_train = list(dataset['sentiment'])

        for j in range (len(y_train)):
            if y_train[j]=='positive':
                y_train[j] = 1
            else:
                y_train[j] = 0
        y_train = np.array(y_train)

        train_sequences = tokenizer.texts_to_sequences(x_train)
        training_padded = pad_sequences(train_sequences, padding=rnn_hyper_param['PADDING_TYPE'], 
                                        maxlen=rnn_hyper_param['MAX_LENGTH'], truncating=rnn_hyper_param['TRUNC_TYPE'])

        history = RNN.fit(training_padded, y_train, batch_size = rnn_hyper_param['BS'],
                                epochs=250, validation_data=(testing_padded, y_test),
                               callbacks=[reduce_lr, earlystop])
        _history.append(history)
    RNN_noaug_hist[f'{size}'] = _history

Training using 0.01 of dataset.
Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 00024: early stopping
Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch

Epoch 00029: early stopping
Train on 400 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00022: early stopping
Training using 0.05 of dataset.
Train on 2000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 00020: ReduceLROnPlateau reducing learn

Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 19/250
Epoch 00019: early stopping
Train on 2000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00019: early stopping
Training using 0.1 of dataset.
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 

Epoch 00019: early stopping
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00018: early stopping
Train on 4000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00017: early stopping
Training using 0.2 of dataset.
Train on 8000 samples, validat

Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00018: early stopping
Train on 12000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00017: early stopping
Train on 12000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009

Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 00037: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 00045: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 00045: early stopping
Train on 16000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: 

Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 00032: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 00032: early stopping
Train on 20000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00018: early stopping
Train on 20000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPl

Epoch 17/250
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00017: early stopping
Train on 24000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 18/250
Epoch 00018: early stopping
Training using 0.7 of dataset.
Train on 28000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 

Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 00033: ReduceLROnPlateau reducing learning rate to 5e-05.
Epoch 00033: early stopping
Training using 0.8 of dataset.
Train on 32000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00026

Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00029: early stopping
Training using 0.9 of dataset.
Train on 36000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0001999999862164259.
Epoch 00029: early stopping
Train on 36000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 

In [91]:
# Average out the validation accuracy of all three iterations, for each subset of data we used to train
# and store it in a dictionary. Do this for all subsets we have (0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

new_dict = {}
for key, item in RNN_noaug_hist.items():
    avg_list = []
    for history in item:
        print(history.history['val_accuracy'])
        avg_list.append(history.history['val_accuracy'])
    print("========================================================")
    avg_list = avgNestedLists(avg_list)
    new_dict[key] = avg_list
    print(avg_list)
    print()

[0.4999, 0.5, 0.5, 0.5031, 0.5041, 0.5, 0.5, 0.505, 0.5043, 0.5048, 0.5, 0.5002, 0.5053, 0.5052, 0.5056, 0.5055, 0.5051, 0.5005, 0.5002, 0.5003, 0.5002, 0.5004, 0.5003, 0.5003]
[0.5, 0.5, 0.5, 0.5, 0.5054, 0.5023, 0.5008, 0.5032, 0.4985, 0.4984, 0.4989, 0.498, 0.5037, 0.5038, 0.5036, 0.5039, 0.5038, 0.5038, 0.5038, 0.5037, 0.5036, 0.5037, 0.5038, 0.5037, 0.5039, 0.5039, 0.5037, 0.5035, 0.5036]
[0.5, 0.4999, 0.5, 0.5, 0.5, 0.5043, 0.5021, 0.501, 0.5023, 0.5033, 0.4987, 0.4994, 0.5026, 0.4978, 0.4983, 0.4989, 0.5044, 0.5038, 0.4993, 0.5037, 0.5028, 0.4978]
[0.49996665, 0.49996665, 0.5, 0.5010333, 0.5031667, 0.5022, 0.50096667, 0.5030666, 0.5017, 0.5021667, 0.4992, 0.4992, 0.5038667, 0.5022667, 0.5025, 0.50276667, 0.50443333, 0.50270003, 0.5011, 0.50256664, 0.50219995, 0.50063336, 0.50205, 0.502, 0.5039, 0.5039, 0.5037, 0.5035, 0.5036]

[0.5, 0.5031, 0.5, 0.5052, 0.5026, 0.5014, 0.4992, 0.5009, 0.4948, 0.5016, 0.4965, 0.4959, 0.4992, 0.5011, 0.5006, 0.4979, 0.4943, 0.4971, 0.4996, 0.4974]

In [96]:
# Write the results into csv files with the name being the model name and the subset size

for title, item in new_dict.items():
    temp_dict = {title:item}
    # convert the history.history dict to a pandas DataFrame:     
    hist_df = pd.DataFrame(temp_dict)

    # save to csv:
    hist_csv_file = 'RNN_NoAug_History_' + str(title) + '.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)