In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

# Define 10 restaurant reviews
reviews = [
    'Never coming back!',
    'horrible service',
    'rude waitress',
    'cold food',
    'horrible food!',
    'awesome',
    'awesome services!',
    'rocks',
    'poor work',
    "couldn't have done better"
]

# Define labels
labels = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

# Create a tokenizer
tokenizer = Tokenizer(num_words=50)
tokenizer.fit_on_texts(reviews)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(reviews)

# Pad sequences
max_length = 4
padded_reviews = pad_sequences(sequences, maxlen=max_length, padding='post')

# Create the model
model = Sequential()
model.add(Embedding(input_dim=50, output_dim=8, input_length=max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

# Print model summary
print(model.summary())

# Train the model
model.fit(padded_reviews, labels, epochs=100, verbose=0)

# Check the shape of the embedding weights
print(model.layers[0].get_weights()[0].shape)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 8)              400       
                                                                 
 flatten (Flatten)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 433 (1.69 KB)
Trainable params: 433 (1.69 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
(50, 8)


In [2]:
from numpy import array
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Embedding, Dense

In [3]:
# Define 10 restaurant reviews
reviews =[
          'Never coming back!',
          'horrible service',
          'rude waitress',
          'cold food',
          'horrible food!',
          'awesome',
          'awesome services!',
          'rocks',
          'poor work',
          'couldn\'t have done better'
]
#Define labels
labels = array([0,0,0,1,0,1,1,1,0,1])

In [4]:
def tokenize_and_pad(docs, vocab_size=50):
    tokenized_docs = [one_hot(d, vocab_size) for d in docs]
    max_len = max([len(doc) for doc in tokenized_docs])
    padded_docs = pad_sequences(tokenized_docs, maxlen=max_len, padding='post')

    return padded_docs

In [5]:
vocab_size = 50
X = tokenize_and_pad(reviews, vocab_size)
y = labels

In [6]:
model1 = Sequential()
model1.add(Embedding(vocab_size, 8, input_length=X.shape[-1]))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model1.summary())

model1.fit(X, y, epochs=20, verbose=0)

loss, accuracy = model1.evaluate(X, y, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 4, 8)              400       
                                                                 
 flatten_1 (Flatten)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 433 (1.69 KB)
Trainable params: 433 (1.69 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Accuracy: 100.000000


In [7]:
model2 = Sequential()
model2.add(Embedding(vocab_size, 8, input_length=X.shape[-1]))
model2.add(Flatten())
model2.add(Dense(2, activation='softmax'))

model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print(model2.summary())

model2.fit(X, y, epochs=20, verbose=0)

loss, accuracy = model2.evaluate(X, y, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 4, 8)              400       
                                                                 
 flatten_2 (Flatten)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 2)                 66        
                                                                 
Total params: 466 (1.82 KB)
Trainable params: 466 (1.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Accuracy: 100.000000


In [8]:
model3 = Sequential()
model3.add(Embedding(vocab_size, 8, input_length=X.shape[-1]))
model3.add(Flatten())
model3.add(Dense(4, activation='relu'))
model3.add(Dense(2, activation='softmax'))

model3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print(model3.summary())

model3.fit(X, y, epochs=20, verbose=0)

loss, accuracy = model3.evaluate(X, y, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 4, 8)              400       
                                                                 
 flatten_3 (Flatten)         (None, 32)                0         
                                                                 
 dense_3 (Dense)             (None, 4)                 132       
                                                                 
 dense_4 (Dense)             (None, 2)                 10        
                                                                 
Total params: 542 (2.12 KB)
Trainable params: 542 (2.12 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Accuracy: 100.000000
