In [1]:
# Dataset
# https://drive.google.com/file/d/192jeGRTCZZfet8ufHPfaMn05T7Biklfw/view?usp=sharing

In [2]:
from tensorflow.keras.preprocessing import text_dataset_from_directory

# Assumes you're in the root level of the dataset directory.
# If you aren't, you'll need to change the relative paths here.
train_data = text_dataset_from_directory("movie-reviews-dataset/test")
test_data = text_dataset_from_directory("movie-reviews-dataset/train")



Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [3]:
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.strings import regex_replace

def prepareData(dir):
  data = text_dataset_from_directory(dir)
  #for filtering
  return data.map(
    lambda text, label: (regex_replace(text, '<br />', ' '), label),
  )

In [4]:
train_data = prepareData("movie-reviews-dataset/test")
test_data = prepareData("movie-reviews-dataset/train")

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [5]:
for text_batch, label_batch in train_data.take(1):
    print(text_batch.numpy()[0])
    print("\n")
    print(label_batch.numpy()[0]) # 0 = negative, 1 = positive

b"Sherlock Holmes and the Secret Weapon starts in Switzerland as the world's foremost detective Sherlock Holmes (Basil Rathbone) outwits the Nazi's & manages to smuggle a brilliant scientist named Dr. Franz Tobel (William Post Jr.) out of the country & to the relative safety of London. But is London as safe as Holmes thinks? Dr. Tobel has engineered a revolutionary new bomb sight that will change aerial bombardment forever & he has agreed to give it to the British government, but those Nazi's want it just as badly & Holmes arch enemy Professor Moriarty (Lionel Atwill) plans on stealing the secret of the bomb sight & selling it to the Nazi's. Add the bumbling Inspector Lestrade (Denis Hoey) of Scotland Yard, Dr. Tobel's love interest Charlotte Eberli (Kaaren Verne), assassins, mysterious scientists & a puzzling coded message & Holmes has his work cut out to keep Dr. Tobel alive so he can deliver his bomb sight...  Directed by Roy William Neill Sherlock Holmes and the Secret Weapon was t

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input

model = Sequential()
model.add(Input(shape=(1,), dtype="string"))

In [7]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

max_tokens = 1000
max_len = 100
vectorize_layer = TextVectorization(
  # Max vocab size. Any words outside of the max_tokens most common ones
  # will be treated the same way: as "out of vocabulary" (OOV) tokens.
  max_tokens=max_tokens,
  # Output integer indices, one per string token
  output_mode="int",
  # Always pad or truncate to exactly this many tokens
  output_sequence_length=max_len,
)

In [8]:
# Call adapt(), which fits the TextVectorization layer to our text dataset.
# This is when the max_tokens most common words (i.e. the vocabulary) are selected.
train_texts = train_data.map(lambda text, label: text)
vectorize_layer.adapt(train_texts)

In [9]:
model.add(vectorize_layer)

In [10]:
from tensorflow.keras.layers import Embedding
model.add(Embedding(max_tokens + 1, 128))

In [None]:
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout

In [20]:
model.add(Embedding(max_tokens + 1, 128))

# ----- 4. RECURRENT LAYER
model.add(LSTM(64,dropout=0.2,recurrent_dropout=0.2))




# ----- 5. DENSE HIDDEN LAYER
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))


# ----- 6. OUTPUT
model.add(Dense(1, activation="sigmoid"))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 100)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 100, 128)          128128    
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 embedding_1 (Embedding)     (None, 64, 128)           128128    
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 embedding_2 (Embedding)     (None, 64, 128)           128128    
                                                        

In [21]:
model.compile(
  optimizer='adam',
  loss='binary_crossentropy',
  metrics=['accuracy'],
)

In [22]:
#LSTM
model.fit(train_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1f0a573afd0>

In [23]:
model.save('LSTM')



INFO:tensorflow:Assets written to: LSTM\assets


INFO:tensorflow:Assets written to: LSTM\assets


In [24]:
from tensorflow import keras
model_LSTM = keras.models.load_model('LSTM')

In [25]:
# Should print a very high score like 0.98.
print(model_LSTM.predict([
  "i loved it! highly recommend it to anyone and everyone looking for a great movie to watch.",
]))

# Should print a very low score like 0.01.
print(model_LSTM.predict([
  "this was awful! i hated it so much, nobody should watch this. the acting was terrible, the music was terrible, overall it was just bad.",
]))

[[0.5002929]]
[[0.5002929]]


In [11]:
from keras.layers import SimpleRNN
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout
# # build model
model.add(SimpleRNN(128, return_sequences=True))
model.add(SimpleRNN(128, return_sequences=True))
model.add(SimpleRNN(128, return_sequences=False))
model.add(Dense(20))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.build()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 100)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 100, 128)          128128    
                                                                 
 simple_rnn (SimpleRNN)      (None, 100, 128)          32896     
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 100, 128)          32896     
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 20)                2580      
                                                        

In [13]:
model.add(Dense(64, activation="relu"))

In [14]:
model.add(Dense(1, activation="sigmoid"))

In [15]:
model.compile(
  optimizer='adam',
  loss='binary_crossentropy',
  metrics=['accuracy'],
)

In [16]:
#Simple RNN
model.fit(train_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2423efd7f70>

In [17]:
model.save('SimpleRNN')

INFO:tensorflow:Assets written to: SimpleRNN\assets


In [18]:
from tensorflow import keras
model = keras.models.load_model('SimpleRNN')

In [19]:
# Should print a very high score like 0.98.
print(model.predict([
  "i loved it! highly recommend it to anyone and everyone looking for a great movie to watch.",
]))

# Should print a very low score like 0.01.
print(model.predict([
  "this was awful! i hated it so much, nobody should watch this. the acting was terrible, the music was terrible, overall it was just bad.",
]))

[[0.5018365]]
[[0.5018365]]
