# Importing Necessary Libraries 

In [20]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from tensorflow.keras import Model
from tensorflow.keras.layers import Input,Dense,Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import numpy as np
import re

# Loading the Sarcasm Dataset
## Datset Link:- https://www.kaggle.com/datasets/rmisra/news-headlines-dataset-for-sarcasm-detection

In [2]:
data_1 = pd.read_json("Sarcasm_Headlines_Dataset.json", lines=True)
data_2 = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
data =  pd.concat([data_1, data_2])
data.head()

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,1
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",1
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0


# Train Test Split the Data

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['headline'],data['is_sarcastic'], stratify=data['is_sarcastic'])

# Loading BERT Preprocess & Base Model from Tensorflow

In [4]:
preprocess_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
encoder_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4'

In [5]:
bert_preprocess_model = hub.KerasLayer(preprocess_url)

In [6]:
text_test = ['Very bad movie','I love Python']
text_preprocessed = bert_preprocess_model(text_test)
text_preprocessed.keys()

dict_keys(['input_word_ids', 'input_type_ids', 'input_mask'])

In [7]:
bert_model = hub.KerasLayer(encoder_url)

In [8]:
bert_results = bert_model(text_preprocessed)
bert_results.keys()

dict_keys(['encoder_outputs', 'default', 'sequence_output', 'pooled_output'])

## BERT(base) Encodes any sentence into an array of 768 length

In [11]:
bert_results['pooled_output'].shape 

TensorShape([2, 768])

## A look at Train Data

In [12]:
X_train.head(4)

26170    retirement overseas: are we all just waiting f...
22909    moron stepfather takes care of child who doesn...
2040                 parking-ramp attendant moves slightly
25043    federal judge pencils blocking trump's unconst...
Name: headline, dtype: object

# Craeting a function for Embedding the Sentence & testing it

In [14]:
def get_sentence_embedding(sentences):
    text_preprocessed = bert_preprocess_model(sentences)
    embedding = bert_model(text_preprocessed)
    return embedding['pooled_output']
        
        
get_sentence_embedding([
    "500$ discount. hurry up", 
    "Bhavin, are you up for a volleybal game tomorrow?"]
)

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.8435168 , -0.5132728 , -0.8884576 , ..., -0.747489  ,
        -0.7531474 ,  0.91964483],
       [-0.87208354, -0.50543964, -0.94446677, ..., -0.858475  ,
        -0.71745336,  0.88082975]], dtype=float32)>

# Creating the Main Neural Network 

In [16]:
#Bert Layers
text_input = Input(shape=(),dtype=tf.string,name='text')
preprocessed_text = bert_preprocess_model(text_input) 
outputs = bert_model(preprocessed_text)

#Neural Network Layers
l = Dropout(0.1)(outputs['pooled_output'])
l = Dense(100,activation='relu')(l)
l = Dense(1,activation='sigmoid')(l)

model = Model(inputs=[text_input],outputs=[l])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        {'input_word_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      {'encoder_outputs':  109482241   keras_layer[0][0]                
                                                                 keras_layer[0][1]                
                                                                 keras_layer[0][2]                
______________________________________________________________________________________________

In [17]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [28]:
checkpoint = ModelCheckpoint(filepath="Sarcasm+BERT.h5", 
                             monitor='loss',
                             verbose=1, 
                             save_best_only=True,
                             mode='min')

In [29]:
X_test.shape

(13832,)

# Fitting the Model with Training Data

In [30]:
model.fit(X_train, y_train, epochs=5,callbacks=[checkpoint])

Epoch 1/5

Epoch 00001: loss improved from inf to 0.44768, saving model to Sarcasm+BERT.h5
Epoch 2/5

Epoch 00002: loss improved from 0.44768 to 0.43669, saving model to Sarcasm+BERT.h5
Epoch 3/5

Epoch 00003: loss improved from 0.43669 to 0.43529, saving model to Sarcasm+BERT.h5
Epoch 4/5

Epoch 00004: loss improved from 0.43529 to 0.42484, saving model to Sarcasm+BERT.h5
Epoch 5/5

Epoch 00005: loss improved from 0.42484 to 0.42243, saving model to Sarcasm+BERT.h5


<keras.callbacks.History at 0x7fa4281b9898>

In [34]:
model.evaluate(X_test,y_test)



[0.4077775478363037, 0.802703857421875]

# Now making some predictions

In [55]:
def predict_sarcasm(sentence):
    pred = model.predict([sentence])
    pred = pred[0][0]*100
    if pred>=50: return "It's a sarcasm!" 
    else: return "It's not a sarcasm."

In [74]:
sentence = "I was depressed. He asked me to be happy. I am not depressed anymore."
predict_sarcasm(sentence)

"It's not a sarcasm."

In [76]:
sentence = "You just broke my car window. Great job."
predict_sarcasm(sentence)

"It's not a sarcasm."

In [77]:
sentence = "You just saved my dog's life. Thanks a million."
predict_sarcasm(sentence)

"It's not a sarcasm."