In [1]:
from my_ai_utils import *

import keras
keras.__version__
import tensorflow as tf
from tensorflow.keras import layers, models, losses
from keras.datasets import imdb
from keras.utils.data_utils import pad_sequences 
import matplotlib.pyplot as plt

In [2]:
max_features = 10000 # number of words to consider as features
maxlen = 500  # cut texts after this number of words (among top max_features most common words)
print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(input_train.shape, 'train sequences')
print(input_test.shape, 'test sequences')
print('Pad sequences (samples x time)')
input_train = pad_sequences(input_train, maxlen=maxlen)
input_test = pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('y shape:', y_train.shape)
print(y_train[0])


Loading data...


In [None]:
np.random.seed(42)
experimental_input_train = input_train
experimental_y_train = y_train

model2 = Sequential(usage=Usage.logisticRegression)
model2.add_layer(Embedding(input_dim=max_features, output_dim=2, seq_length=maxlen))
model2.add_layer(RNN(in_features=2, hidden_features=4, architecture="many_to_one"))
model2.add_layer(Dense(in_features=4, out_features=1, activation="sigmoid"))

embedding_w = model2.layers[0].kernel
rnn_w = [model2.layers[1].Wxa, model2.layers[1].Waa, model2.layers[1].ba]
dense_w = [model2.layers[2].kernel, model2.layers[2].biases]
# learning_rate schedulers
# lr = Warmup(target_lr=0.1, warm_steps=20)
#lr = CosineDecay(initial_lr=0.1, alpha=0.001, warmup=False, warmup_steps=100, hold_steps=200)
# lr = ExponentialDecay(initial_lr=0.01, decay_rate=0.2, warmup=True, warmup_steps=5, hold_steps=5)

model2.compile(loss_fn=Loss("l2"), optimizer=RMSprop(beta=0.9, lr=0.01))
print("Initial for dense", model2.layers[-1].kernel)
print("Initial for rnn", model2.layers[1].Wxa)
print("Initial for rnn", model2.layers[1].Waa)
print("Initial for embedding", model2.layers[0].kernel[0: 5])

In [None]:
np.random.seed(42)
model1 = models.Sequential()
model1.add(layers.Embedding(max_features, 2))
model1.add(layers.SimpleRNN(4, activation="tanh"))
model1.add(layers.Dense(1, activation='sigmoid'))

model1.layers[0].set_weights([embedding_w])
model1.layers[1].set_weights(rnn_w)
model1.layers[2].set_weights(dense_w)

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model1.compile(optimizer=optimizer, loss='mse', metrics=['acc'])

embedding_weights = model1.layers[0].get_weights()
dense_weights = model1.layers[2].get_weights()
rnn_weights = model1.layers[1].get_weights()
print("Initial for dense ", model1.layers[2].get_weights()[0])
print("Initial for rnn ", model1.layers[1].get_weights()[0])
print("Initial for rnn ", model1.layers[1].get_weights()[1])
print("Initial for embedding ", model1.layers[0].get_weights()[0][0:5])

In [None]:
history = model2.train(input_train, y_train, nepochs=10, batch_size=128)
print("After for dense", model2.layers[-1].kernel)
print("After for rnn", model2.layers[1].Wxa)
print("After for embedding", model2.layers[0].kernel[0: 5])

In [None]:
history = model1.fit(input_train, y_train, epochs=10, batch_size=128)
print("Initial for dense ", model1.layers[2].get_weights()[0])
print("Initial for rnn ", model1.layers[1].get_weights()[0])
print("Initial for rnn ", model1.layers[1].get_weights()[1])
print("Initial for embedding ", model1.layers[0].get_weights()[0][0:5])

In [None]:
eval_metrics = model2.evaluate(input_test, y_test)

In [None]:
y_pred = model1.predict(input_test)
#print((y_pred < 0.3).sum() + (y_pred > 0.7).sum())
#y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions
#print(y_pred.sum() / len(y_pred))
#print(y_test.sum() / len(y_test))
Accuracy(y_pred, y_test)
ConfusionMatrix(y_pred, y_test, classes=[0, 1], usage=Usage.logisticRegression)