<a href="https://colab.research.google.com/github/ProfAI/tf00/blob/master/10%20-%20Modelli%20Sequenziali/rnn_lstm_gru.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Embedding, SimpleRNN, GRU, LSTM

In [69]:
MAX_WORDS = 10000
NUM_EMBEDDING = 50
SEQ_MAX_LENGTH = 50

BATCH_SIZE = 512
VALIDATION_SPLIT = 0.2

In [70]:
import os
from sklearn.utils import shuffle
import subprocess


def load_imdb(files_path, labels=["pos", "neg"]):
    
    if(not os.path.isfile("aclImdb_v1.tar.gz")):
      os.system("wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz")
      os.system("tar -xf aclImdb_v1.tar.gz")
    
    label_map = {labels[0]:1, labels[1]:0}
    
    reviews = []
    y = []
    
    for label in labels:
      path = files_path+label
      for file in os.listdir(path):
        review_file = open(path+"/"+file)
        review = review_file.read()    
        
        reviews.append(review)
        y.append(label_map[label])
        
    # la funzione shuffle di sklearn ci permette di
    # mescolare più array allo stesso modo
    
    reviews, y = shuffle(reviews,y)
    
    return(reviews,y)

In [71]:
reviews_train, y_train = load_imdb("aclImdb/train/")
reviews_test, y_test = load_imdb("aclImdb/test/")

tokenizer = Tokenizer(num_words=MAX_WORDS)
tokenizer.fit_on_texts(reviews_train)

X_train = tokenizer.texts_to_sequences(reviews_train)
X_test = tokenizer.texts_to_sequences(reviews_test)

X_train = pad_sequences(X_train, maxlen = SEQ_MAX_LENGTH)
X_test = pad_sequences(X_test, maxlen = SEQ_MAX_LENGTH)

y_train = np.array(y_train)
y_test = np.array(y_test)

## Rete Ricorrente Semplice

In [72]:
model = Sequential()

model.add(Embedding(MAX_WORDS, NUM_EMBEDDING, input_length=SEQ_MAX_LENGTH))
model.add(SimpleRNN(32))
model.add(Dropout(0.5))
model.add(Dense(1, activation='relu'))

model.summary()

Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_28 (Embedding)     (None, 50, 50)            500000    
_________________________________________________________________
simple_rnn_14 (SimpleRNN)    (None, 32)                2656      
_________________________________________________________________
dropout_35 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 33        
Total params: 502,689
Trainable params: 502,689
Non-trainable params: 0
_________________________________________________________________


In [73]:
model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f3ee741e320>

## Rete Ricorrente Gated Recurrent Unit (GRU)

In [74]:
model = Sequential()

model.add(Embedding(MAX_WORDS, NUM_EMBEDDING, input_length=SEQ_MAX_LENGTH))
model.add(GRU(32, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_29 (Embedding)     (None, 50, 50)            500000    
_________________________________________________________________
gru_6 (GRU)                  (None, 32)                8064      
_________________________________________________________________
dropout_36 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_37 (Dense)             (None, 1)                 33        
Total params: 508,097
Trainable params: 508,097
Non-trainable params: 0
_________________________________________________________________


In [75]:
model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f3ee724ff60>

## Rete Ricorrente Long-Short Term Memory (LSTM)

In [76]:
model = Sequential()

model.add(Embedding(MAX_WORDS, NUM_EMBEDDING, input_length=SEQ_MAX_LENGTH))
model.add(LSTM(32, dropout=0.4, recurrent_dropout=0.4))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_30 (Embedding)     (None, 50, 50)            500000    
_________________________________________________________________
lstm_13 (LSTM)               (None, 32)                10624     
_________________________________________________________________
dropout_37 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_38 (Dense)             (None, 1)                 33        
Total params: 510,657
Trainable params: 510,657
Non-trainable params: 0
_________________________________________________________________


In [77]:
model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f3ef7086d30>

## Rete Ricorrente Profonda

In [78]:
model = Sequential()

model.add(Embedding(MAX_WORDS, NUM_EMBEDDING, input_length=SEQ_MAX_LENGTH))
model.add(Dropout(0.6))
model.add(LSTM(32, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))
model.add(LSTM(32, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_31 (Embedding)     (None, 50, 50)            500000    
_________________________________________________________________
dropout_38 (Dropout)         (None, 50, 50)            0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 50, 32)            10624     
_________________________________________________________________
lstm_15 (LSTM)               (None, 32)                8320      
_________________________________________________________________
dropout_39 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_39 (Dense)             (None, 64)                2112      
_________________________________________________________________
dense_40 (Dense)             (None, 1)               

In [79]:
model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f3ee73ad0f0>