### Redes neurais recorrentes (RNNs)

In [None]:
import pandas as pd
import numpy  as np

from numpy.random import seed
seed(1)

from tensorflow import set_random_seed
set_random_seed(2)

import keras

In [None]:
df = pd.read_csv('iphone6.csv')

In [None]:
df.head()

In [None]:
X = df.message.values
y = df.label.values

y[y < 0] = 0

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from keras.preprocessing.text import Tokenizer

t = Tokenizer()
t.fit_on_texts(X_train)

In [None]:
print(t.word_counts) # palavras e a contagem de ocorrencia

In [None]:
print(t.document_count) # quantidade de documentos

In [None]:
print(t.word_index) # palavra e seu indíce no vocabulário

In [None]:
print(t.word_docs) # palavras e qtos documentos elas apareceram

In [None]:
# transformando em sequencia pra usar na RNN

from keras.preprocessing import sequence

max_len = 20

seq_train = t.texts_to_sequences(X_train)
seq_train = sequence.pad_sequences(seq_train, maxlen=max_len)

In [None]:
# definindo o modelo

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding

model = Sequential()
model.add(Embedding(len(t.word_index), 20, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
model.fit(seq_train, y_train, epochs=5, batch_size=16)

In [None]:
seq_test = t.texts_to_sequences(X_test)
seq_test = sequence.pad_sequences(seq_test, maxlen=max_len)

scores = model.evaluate(seq_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
# usando dropout

from keras.layers import Dropout

model = Sequential()
model.add(Embedding(len(t.word_index), 20, input_length=max_len))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
model.fit(seq_train, y_train, epochs=5, batch_size=16)

scores = model.evaluate(seq_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
# LSTM stackada

model = Sequential()
model.add(Embedding(len(t.word_index), 20, input_length=max_len))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

model.fit(seq_train, y_train, epochs=5, batch_size=16)

scores = model.evaluate(seq_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
# recorrentes bidirecionais

from keras.layers import Bidirectional

model = Sequential()
model.add(Embedding(len(t.word_index), 20, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

model.fit(seq_train, y_train, epochs=5, batch_size=16)

scores = model.evaluate(seq_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
# GRU

from keras.layers import GRU

model = Sequential()
model.add(Embedding(len(t.word_index), 20, input_length=max_len))
model.add(GRU(64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

model.fit(seq_train, y_train, epochs=5, batch_size=16)

scores = model.evaluate(seq_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

### Exercício

In [None]:
X = []
y = []

# lendo uma coleção de SMS Spam

f = open("SMSSpamCollection.txt", "r")
for l in f:
    l = l.split()
    X.append(" ".join(l[1:]))
    y.append(1 if l[0] == "spam" else 0)
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)