# Sarcasm Detector using TensorFlow

### Importing Datasets

In [None]:
with open('Sarcasm_Headlines_Dataset_v2.json', 'r') as f:
    totaldata = f.readlines()

sentences = []
labels = []
urls = []

for item in totaldata:
    for news in eval(item):
        news = eval(item)
        sentences.append(news['headline'])
        labels.append(news['is_sarcastic'])
        urls.append(news['article_link'])


### Defining some runtime variables

In [4]:
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000

### Importing required Modules

In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from datetime import datetime

### Splitting Testing and Training set

In [6]:
training_sentences = sentences[0:10000]
testing_sentences = sentences[10000:]
training_labels = labels[0:10000]
testing_labels = labels[10000:]

### Tokenizing and Padding datasets

In [7]:
tokenizer = Tokenizer(num_words=vocab_size,oov_token='oov')
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(training_sequences, padding='post', maxlen=max_length,truncating=trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, padding='post', maxlen=max_length,truncating=trunc_type)

2022-03-16 15:32:59.077226
2022-03-16 15:32:59.766261


### Defining neural network

In [12]:
t1 = datetime.now()

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer = 'adam',metrics=['accuracy'])
t2 = datetime.now()
print(t2-t1)

2022-03-16 15:33:50.131527
2022-03-16 15:33:50.259525


### Converting datalists as arrays

In [14]:
import numpy as np
training_padded = np.array(padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

### Training the neural network

In [18]:
t1 = (datetime.now())
num_epochs = 50
history = model.fit(padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)
t2 = (datetime.now())
print(t2-t1)

Epoch 1/50
313/313 - 7s - loss: 1.9417e-09 - accuracy: 1.0000 - val_loss: 1.8759 - val_accuracy: 0.7845 - 7s/epoch - 23ms/step
Epoch 2/50
313/313 - 6s - loss: 1.9781e-09 - accuracy: 1.0000 - val_loss: 1.8631 - val_accuracy: 0.7864 - 6s/epoch - 21ms/step
Epoch 3/50
313/313 - 6s - loss: 1.8993e-09 - accuracy: 1.0000 - val_loss: 1.8658 - val_accuracy: 0.7862 - 6s/epoch - 21ms/step
Epoch 4/50
313/313 - 11s - loss: 1.9640e-09 - accuracy: 1.0000 - val_loss: 1.8649 - val_accuracy: 0.7863 - 11s/epoch - 37ms/step
Epoch 5/50
313/313 - 7s - loss: 1.8857e-09 - accuracy: 1.0000 - val_loss: 1.8834 - val_accuracy: 0.7844 - 7s/epoch - 22ms/step
Epoch 6/50
313/313 - 7s - loss: 1.9301e-09 - accuracy: 1.0000 - val_loss: 1.8627 - val_accuracy: 0.7871 - 7s/epoch - 21ms/step
Epoch 7/50
313/313 - 6s - loss: 1.8212e-09 - accuracy: 1.0000 - val_loss: 1.8650 - val_accuracy: 0.7868 - 6s/epoch - 21ms/step
Epoch 8/50
313/313 - 11s - loss: 1.8733e-09 - accuracy: 1.0000 - val_loss: 1.8796 - val_accuracy: 0.7847 - 11

### Testing the neural network

In [None]:
sentence = ["Elephant jumped out of the window", "The weather today is bright and sunny"]
sequences = tokenizer.texts_to_sequences(sentence)

In [None]:
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))