In [1]:
#Using Four different Neural Networks, to avoid the overfitting and to find the Highly Accurate one
import tensorflow as tf
print(tf.__version__)

2.4.1


In [2]:
import json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json

--2021-03-04 09:41:47--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json
Resolving storage.googleapis.com (storage.googleapis.com)... 2404:6800:4007:812::2010, 2404:6800:4007:810::2010, 2404:6800:4007:80c::2010, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|2404:6800:4007:812::2010|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5643545 (5.4M) [application/json]
Saving to: ‘/tmp/sarcasm.json’


2021-03-04 09:42:19 (178 KB/s) - ‘/tmp/sarcasm.json’ saved [5643545/5643545]



In [5]:
with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)

sentences = []
labels = []

for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

In [6]:
training_size = 20000
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

In [7]:
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

In [8]:
#Word Indexing
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index


In [9]:
#Sequence Conversion for Training set
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

#Sequence Conversion for Training set
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [10]:
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

In [12]:
#Simple RNN
model1 = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model1.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [13]:
history = model1.fit(training_padded, 
                    training_labels, 
                    epochs=30, 
                    validation_data=(testing_padded, testing_labels), 
                    verbose=2)

Epoch 1/30
625/625 - 4s - loss: 0.6604 - accuracy: 0.5949 - val_loss: 0.5806 - val_accuracy: 0.6584
Epoch 2/30
625/625 - 2s - loss: 0.4374 - accuracy: 0.8286 - val_loss: 0.3899 - val_accuracy: 0.8369
Epoch 3/30
625/625 - 2s - loss: 0.3167 - accuracy: 0.8754 - val_loss: 0.3550 - val_accuracy: 0.8509
Epoch 4/30
625/625 - 2s - loss: 0.2632 - accuracy: 0.8982 - val_loss: 0.3469 - val_accuracy: 0.8539
Epoch 5/30
625/625 - 2s - loss: 0.2299 - accuracy: 0.9101 - val_loss: 0.3417 - val_accuracy: 0.8554
Epoch 6/30
625/625 - 2s - loss: 0.2021 - accuracy: 0.9240 - val_loss: 0.3467 - val_accuracy: 0.8544
Epoch 7/30
625/625 - 2s - loss: 0.1785 - accuracy: 0.9311 - val_loss: 0.3624 - val_accuracy: 0.8515
Epoch 8/30
625/625 - 2s - loss: 0.1599 - accuracy: 0.9408 - val_loss: 0.3798 - val_accuracy: 0.8481
Epoch 9/30
625/625 - 2s - loss: 0.1460 - accuracy: 0.9468 - val_loss: 0.3857 - val_accuracy: 0.8523
Epoch 10/30
625/625 - 2s - loss: 0.1297 - accuracy: 0.9546 - val_loss: 0.4307 - val_accuracy: 0.8368

In [14]:
#Using LSTM Layer 
model2 = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model2.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [15]:
history1 = model2.fit(training_padded, 
                    training_labels, 
                    epochs=30, 
                    validation_data=(testing_padded, testing_labels), 
                    verbose=2)

Epoch 1/30
625/625 - 28s - loss: 0.4220 - accuracy: 0.7940 - val_loss: 0.3200 - val_accuracy: 0.8617
Epoch 2/30
625/625 - 25s - loss: 0.2078 - accuracy: 0.9160 - val_loss: 0.3378 - val_accuracy: 0.8566
Epoch 3/30
625/625 - 25s - loss: 0.1340 - accuracy: 0.9498 - val_loss: 0.3984 - val_accuracy: 0.8545
Epoch 4/30
625/625 - 27s - loss: 0.0745 - accuracy: 0.9748 - val_loss: 0.5073 - val_accuracy: 0.8450
Epoch 5/30
625/625 - 30s - loss: 0.0449 - accuracy: 0.9858 - val_loss: 0.6374 - val_accuracy: 0.8414
Epoch 6/30
625/625 - 26s - loss: 0.0292 - accuracy: 0.9913 - val_loss: 0.6921 - val_accuracy: 0.8368
Epoch 7/30
625/625 - 24s - loss: 0.0218 - accuracy: 0.9935 - val_loss: 0.7856 - val_accuracy: 0.8371
Epoch 8/30
625/625 - 25s - loss: 0.0148 - accuracy: 0.9955 - val_loss: 0.8624 - val_accuracy: 0.8316
Epoch 9/30
625/625 - 24s - loss: 0.0110 - accuracy: 0.9963 - val_loss: 1.0359 - val_accuracy: 0.8311
Epoch 10/30
625/625 - 26s - loss: 0.0082 - accuracy: 0.9975 - val_loss: 0.9834 - val_accura

In [16]:
#Using Convolution Layer
model3 = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model3.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [17]:
history2 = model3.fit(training_padded, 
                    training_labels, 
                    epochs=30, 
                    validation_data=(testing_padded, testing_labels), 
                    verbose=2)

Epoch 1/30
625/625 - 6s - loss: 0.4403 - accuracy: 0.7793 - val_loss: 0.3520 - val_accuracy: 0.8456
Epoch 2/30
625/625 - 5s - loss: 0.2276 - accuracy: 0.9091 - val_loss: 0.3603 - val_accuracy: 0.8498
Epoch 3/30
625/625 - 5s - loss: 0.1153 - accuracy: 0.9607 - val_loss: 0.4485 - val_accuracy: 0.8420
Epoch 4/30
625/625 - 5s - loss: 0.0461 - accuracy: 0.9868 - val_loss: 0.5789 - val_accuracy: 0.8398
Epoch 5/30
625/625 - 5s - loss: 0.0154 - accuracy: 0.9962 - val_loss: 0.7227 - val_accuracy: 0.8372
Epoch 6/30
625/625 - 5s - loss: 0.0055 - accuracy: 0.9985 - val_loss: 0.8282 - val_accuracy: 0.8335
Epoch 7/30
625/625 - 5s - loss: 0.0024 - accuracy: 0.9992 - val_loss: 0.9404 - val_accuracy: 0.8328
Epoch 8/30
625/625 - 5s - loss: 0.0019 - accuracy: 0.9995 - val_loss: 0.9920 - val_accuracy: 0.8378
Epoch 9/30
625/625 - 5s - loss: 0.0015 - accuracy: 0.9995 - val_loss: 1.0400 - val_accuracy: 0.8368
Epoch 10/30
625/625 - 5s - loss: 8.5852e-04 - accuracy: 0.9997 - val_loss: 1.1117 - val_accuracy: 0.

In [18]:
#Using Gated Recurrent Unit Layer
model4 = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model4.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [19]:
history3 = model4.fit(training_padded, 
                    training_labels, 
                    epochs=30, 
                    validation_data=(testing_padded, testing_labels), 
                    verbose=2)

Epoch 1/30
625/625 - 26s - loss: 0.4091 - accuracy: 0.8001 - val_loss: 0.3320 - val_accuracy: 0.8569
Epoch 2/30
625/625 - 21s - loss: 0.2190 - accuracy: 0.9111 - val_loss: 0.3470 - val_accuracy: 0.8501
Epoch 3/30
625/625 - 21s - loss: 0.1468 - accuracy: 0.9441 - val_loss: 0.4092 - val_accuracy: 0.8447
Epoch 4/30
625/625 - 23s - loss: 0.0949 - accuracy: 0.9671 - val_loss: 0.4864 - val_accuracy: 0.8472
Epoch 5/30
625/625 - 25s - loss: 0.0638 - accuracy: 0.9787 - val_loss: 0.5166 - val_accuracy: 0.8423
Epoch 6/30
625/625 - 26s - loss: 0.0357 - accuracy: 0.9888 - val_loss: 0.7147 - val_accuracy: 0.8411
Epoch 7/30
625/625 - 23s - loss: 0.0251 - accuracy: 0.9920 - val_loss: 0.7772 - val_accuracy: 0.8401
Epoch 8/30
625/625 - 28s - loss: 0.0177 - accuracy: 0.9945 - val_loss: 0.9175 - val_accuracy: 0.8344
Epoch 9/30
625/625 - 26s - loss: 0.0116 - accuracy: 0.9967 - val_loss: 0.9597 - val_accuracy: 0.8360
Epoch 10/30
625/625 - 28s - loss: 0.0098 - accuracy: 0.9966 - val_loss: 1.0273 - val_accura

# Results

| Model | Epochs | Training Accuracy | Validation |
|---|---|---|---|
|Simple RNN|30|99.34%|81.64%|
|LSTM|30|99.96%|81.83%|
|Convolution|30|99.94%|82.17%|
|GRU|30|99.95%|83.23%|