<a href="https://colab.research.google.com/github/SravaniNadiu/Projectfile/blob/main/M1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
df = pd.read_json('Sarcasm_Headlines_Dataset.json', lines=True)

# Split the data into training and testing sets
sentences = df['headline']
labels = df['is_sarcastic']
X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2, random_state=42)

# Tokenize the sentences
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

# Pad the sequences to ensure uniform length
max_length = 100
padding_type = 'post'
trunc_type = 'post'

X_train_padded = pad_sequences(X_train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# New Section

In [None]:
import numpy as np

# Load the GloVe embeddings
embeddings_index = {}
with open('glove.6B.100d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

# Create an embedding matrix
embedding_dim = 100
word_index = tokenizer.word_index
embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))

for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Define the model
model= Sequential([
   Embedding(input_dim=len(word_index) + 1, output_dim=embedding_dim, weights=[embedding_matrix], trainable=False),
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dense(24, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print the model summary
model.summary()

In [None]:
# Train the model
num_epochs = 10
history = model.fit(X_train_padded, y_train, epochs=num_epochs, validation_data=(X_test_padded, y_test), verbose=2)

Epoch 1/10
668/668 - 64s - 96ms/step - accuracy: 0.5606 - loss: 0.6865 - val_accuracy: 0.5608 - val_loss: 0.6857
Epoch 2/10
668/668 - 80s - 120ms/step - accuracy: 0.5611 - loss: 0.6861 - val_accuracy: 0.5608 - val_loss: 0.6858
Epoch 3/10
668/668 - 60s - 89ms/step - accuracy: 0.5611 - loss: 0.6861 - val_accuracy: 0.5608 - val_loss: 0.6859
Epoch 4/10
668/668 - 79s - 119ms/step - accuracy: 0.5611 - loss: 0.6859 - val_accuracy: 0.5608 - val_loss: 0.6858
Epoch 5/10
668/668 - 82s - 122ms/step - accuracy: 0.5611 - loss: 0.6858 - val_accuracy: 0.5608 - val_loss: 0.6857
Epoch 6/10
668/668 - 58s - 87ms/step - accuracy: 0.5611 - loss: 0.6858 - val_accuracy: 0.5608 - val_loss: 0.6859
Epoch 7/10
668/668 - 80s - 120ms/step - accuracy: 0.5611 - loss: 0.6859 - val_accuracy: 0.5608 - val_loss: 0.6857
Epoch 8/10
668/668 - 59s - 88ms/step - accuracy: 0.5611 - loss: 0.6859 - val_accuracy: 0.5608 - val_loss: 0.6857
Epoch 9/10
668/668 - 81s - 121ms/step - accuracy: 0.5611 - loss: 0.6858 - val_accuracy: 0.56

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test, verbose=2)
print(f"Test Accuracy: {accuracy:.4f}")

167/167 - 4s - 22ms/step - accuracy: 0.5608 - loss: 0.6857
Test Accuracy: 0.5608


In [None]:
# Function to predict sarcasm
def predict_sarcasm(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type)
    prediction = model.predict(padded)
    return "Sarcastic" if prediction > 0.5 else "Not Sarcastic"

# Example usage
print(predict_sarcasm("Oh great, another Monday morning"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329ms/step
Not Sarcastic


In [None]:
# Example dataset
df = pd.read_json('Sarcasm_Headlines_Dataset.json', lines=True)
print(df.head())

                                        article_link  \
0  https://www.huffingtonpost.com/entry/versace-b...   
1  https://www.huffingtonpost.com/entry/roseanne-...   
2  https://local.theonion.com/mom-starting-to-fea...   
3  https://politics.theonion.com/boehner-just-wan...   
4  https://www.huffingtonpost.com/entry/jk-rowlin...   

                                            headline  is_sarcastic  
0  former versace store clerk sues over secret 'b...             0  
1  the 'roseanne' revival catches up to our thorn...             0  
2  mom starting to fear son's web series closest ...             1  
3  boehner just wants wife to listen, not come up...             1  
4  j.k. rowling wishes snape happy birthday in th...             0  


In [None]:
print(predict_sarcasm("Oh great, another Monday morning"))
print(predict_sarcasm("I love waking up at 5 AM for work"))
print(predict_sarcasm("The weather is so nice today"))
print(predict_sarcasm("Thirsty cops raid local bar, arrest 80-year-old woman for feeding stray cats"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Not Sarcastic
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Not Sarcastic
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Not Sarcastic
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Not Sarcastic


# New Section