In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.preprocessing.text import Tokenizer

sentence = ["I love my dog",
            "I love my cat",
            "You love my dog!"]

tokenizer = Tokenizer(num_words=150)

tokenizer.fit_on_texts(sentence)

word_index = tokenizer.word_index 

print(word_index)


{'love': 1, 'my': 2, 'i': 3, 'dog': 4, 'cat': 5, 'you': 6}


In [None]:
sequences = tokenizer.texts_to_sequences(sentence)
print(sequences)

[[3, 1, 2, 4], [3, 1, 2, 5], [6, 1, 2, 4]]


In [None]:
test_data = ["I love my Laptop",
             "My dog loves my cat"]

test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)

[[3, 1, 2], [2, 4, 2, 5]]


In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.preprocessing.text import Tokenizer

sentence = ["I love my dog",
            "I love my cat",
            "You love my dog!"]

tokenizer = Tokenizer(num_words=150, oov_token="<OOV>")

tokenizer.fit_on_texts(sentence)

word_index = tokenizer.word_index 

print(word_index)

sequences = tokenizer.texts_to_sequences(sentence)
print(sequences)


test_data = ["I love my Laptop",
             "My dog loves my cat"]

test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)



{'<OOV>': 1, 'love': 2, 'my': 3, 'i': 4, 'dog': 5, 'cat': 6, 'you': 7}
[[4, 2, 3, 5], [4, 2, 3, 6], [7, 2, 3, 5]]
[[4, 2, 3, 1], [3, 5, 1, 3, 6]]


In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

sentence = ["I love my dog",
            "I love my cat",
            "You love my dog!"]

tokenizer = Tokenizer(num_words=150, oov_token="<OOV>")

tokenizer.fit_on_texts(sentence)

word_index = tokenizer.word_index 

print(word_index)

sequences = tokenizer.texts_to_sequences(sentence)
print(sequences)

padded = pad_sequences(sequences)
print(padded)

test_data = ["I love my Laptop",
             "My dog loves my cat"]

test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)



{'<OOV>': 1, 'love': 2, 'my': 3, 'i': 4, 'dog': 5, 'cat': 6, 'you': 7}
[[4, 2, 3, 5], [4, 2, 3, 6], [7, 2, 3, 5]]
[[4 2 3 5]
 [4 2 3 6]
 [7 2 3 5]]
[[4, 2, 3, 1], [3, 5, 1, 3, 6]]


In [4]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

sentence = ["I love my dog",
            "I love my cat",
            "You love my dog!"]

tokenizer = Tokenizer(num_words=150, oov_token="<OOV>")

tokenizer.fit_on_texts(sentence)

word_index = tokenizer.word_index 

print(word_index)

sequences = tokenizer.texts_to_sequences(sentence)
print(sequences)

padded = pad_sequences(sequences, padding="post", truncating="post", maxlen=6)
print(padded)

test_data = ["I love my Laptop",
             "My dog loves my cat"]

test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)



{'<OOV>': 1, 'love': 2, 'my': 3, 'i': 4, 'dog': 5, 'cat': 6, 'you': 7}
[[4, 2, 3, 5], [4, 2, 3, 6], [7, 2, 3, 5]]
[[4 2 3 5 0 0]
 [4 2 3 6 0 0]
 [7 2 3 5 0 0]]
[[4, 2, 3, 1], [3, 5, 1, 3, 6]]


In [None]:
#https://www.kaggle.com/datasets/rmisra/news-headlines-dataset-for-sarcasm-detection

<h1>Sarcasm Detector</h1>

In [2]:
#Datasets
!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json

--2022-04-18 14:01:29--  https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.189.128, 108.177.97.128, 108.177.125.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.189.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5643545 (5.4M) [application/json]
Saving to: ‘sarcasm.json’


2022-04-18 14:01:29 (115 MB/s) - ‘sarcasm.json’ saved [5643545/5643545]



In [3]:
import json 

with open("sarcasm.json", "r") as f:
  datastore = json.load(f)

sentences = []
labels = []
urls = []

for item in datastore: 
  sentences.append(item["headline"])
  labels.append(item["is_sarcastic"])
  urls.append(item["article_link"])


tokenizers = Tokenizer(num_words=1000, oov_token="<OOV>")

tokenizers.fit_on_texts(sentences)

word_index = tokenizers.word_index

print(word_index)

sequences = tokenizers.texts_to_sequences(sentences)

padding = pad_sequences(sequences)

print(padding[0]) 

print(padding.shape)


NameError: ignored