In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
import pandas as pd
import re
pd.set_option('display.max_colwidth', None)
sar_acc = pd.read_json('/content/drive/MyDrive/Sarcasm_Headlines_Dataset.json',lines=True)
sar_acc['source'] = sar_acc['article_link'].apply(lambda x: re.findall(r'\w+', x)[2])
sar_acc.head()

Unnamed: 0,article_link,headline,is_sarcastic,source
0,https://www.huffingtonpost.com/entry/versace-black-code_us_5861fbefe4b0de3a08f600d5,former versace store clerk sues over secret 'black code' for minority shoppers,0,huffingtonpost
1,https://www.huffingtonpost.com/entry/roseanne-revival-review_us_5ab3a497e4b054d118e04365,"the 'roseanne' revival catches up to our thorny political mood, for better and worse",0,huffingtonpost
2,https://local.theonion.com/mom-starting-to-fear-son-s-web-series-closest-thing-she-1819576697,mom starting to fear son's web series closest thing she will have to grandchild,1,theonion
3,https://politics.theonion.com/boehner-just-wants-wife-to-listen-not-come-up-with-alt-1819574302,"boehner just wants wife to listen, not come up with alternative debt-reduction ideas",1,theonion
4,https://www.huffingtonpost.com/entry/jk-rowling-wishes-snape-happy-birthday_us_569117c4e4b0cad15e64fdcb,j.k. rowling wishes snape happy birthday in the most magical way,0,huffingtonpost


In [6]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
X = sar_acc.headline
Y = sar_acc.is_sarcastic
le = LabelEncoder()
Y = le.fit_transform(Y)
Y = Y.reshape(-1,1)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)

In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_words = 1000
max_len = 150

# Tokenization
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X_train)

# Convert text to sequences
sequences = tok.texts_to_sequences(X_train)

# Padding
sequences_matrix = pad_sequences(sequences, maxlen=max_len)


In [9]:
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding
from keras.optimizers import RMSprop
from keras.models import Model
def Sarcasm_NN():
    inputs = Input(name='inputs',shape=[max_len])
    layer = Embedding(max_words,50,input_length=max_len)(inputs)
    layer = LSTM(64)(layer)
    layer = Dense(256,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.2)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('sigmoid')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [10]:
model = Sarcasm_NN()
model.summary()
model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics=['accuracy'])



In [11]:
from keras.callbacks import EarlyStopping
model.fit(sequences_matrix,Y_train,batch_size=100,epochs=5,
          validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.0001)])

Epoch 1/5
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 213ms/step - accuracy: 0.6345 - loss: 0.6182 - val_accuracy: 0.7852 - val_loss: 0.4463
Epoch 2/5
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 219ms/step - accuracy: 0.8239 - loss: 0.3940 - val_accuracy: 0.8189 - val_loss: 0.3947
Epoch 3/5
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 221ms/step - accuracy: 0.8372 - loss: 0.3585 - val_accuracy: 0.8189 - val_loss: 0.4011


<keras.src.callbacks.history.History at 0x7a67e5167700>

In [13]:
from tensorflow.keras.preprocessing.sequence import pad_sequences # Make sure to import
test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = pad_sequences(test_sequences,maxlen=max_len) # Call pad_sequences directly

In [14]:
accr = model.evaluate(test_sequences_matrix,Y_test)

[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.8078 - loss: 0.4013
