## Multilevel Perceptron (MLP)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf




In [2]:
(x_train_imdb, y_train_imdb), (x_test_imdb, y_test_imdb) = tf.keras.datasets.imdb.load_data()

word_index = tf.keras.datasets.imdb.get_word_index()
index2word = dict((i + 3, word) for (word, i) in word_index.items())
index2word[0] = '[pad]'
index2word[1] = '[bos]'
index2word[2] = '[oov]'
x_train_imdb = np.array([' '.join([index2word[idx] for idx in text]) for text in x_train_imdb])
x_test_imdb = np.array([' '.join([index2word[idx] for idx in text]) for text in x_test_imdb])

In [3]:
from sklearn.feature_extraction.text import CountVectorizer

# min-df --> when building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold.
binary_vectorizer = CountVectorizer(binary=True, min_df=100)
x_train_imdb_binary = binary_vectorizer.fit_transform(x_train_imdb)
x_test_imdb_binary = binary_vectorizer.transform(x_test_imdb)
print(
    'Vocabulary size:', len(binary_vectorizer.vocabulary_)
)

Vocabulary size: 3834


In [4]:
x_train_imdb_binary = x_train_imdb_binary.toarray()
x_test_imdb_binary = x_test_imdb_binary.toarray()

In [5]:
dummy_embeddings = tf.keras.layers.Embedding(1000, 5)
dummy_embeddings(tf.constant([1, 2, 3])).numpy()




array([[-0.04668405,  0.00094485,  0.01560987, -0.00757938,  0.01854808],
       [ 0.01599525, -0.00434742, -0.00332546,  0.01437619, -0.02622013],
       [ 0.03637208,  0.04453986, -0.00845218,  0.01072422,  0.03666883]],
      dtype=float32)

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten


model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(3834,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train_imdb_binary, y_train_imdb, epochs=4, batch_size=512)
results = model.evaluate(x_test_imdb_binary, y_test_imdb)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
