## Aplicando o Método Window no LSTM para Mapeamento das Letras do Alfabeto

In [13]:
# Imports
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import to_categorical

# Random Seed
numpy.random.seed(7)

In [14]:
# Dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# Cria mapeamento de caracteres para números inteiros (0-25) e o reverso
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [15]:
# Prepara o conjunto de dados de pares de entrada/saída codificados como números inteiros
seq_length = 3
dataX = []
dataY = []

for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


In [16]:
# Reshape de X para [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), 1, seq_length))

# Normalização
X = X / float(len(alphabet))

In [18]:
# One-Hot Encoding para as variáveis de saída
y = to_categorical(dataY)

In [19]:
# Fit do Modelo
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 2s - loss: 3.2618 - accuracy: 0.0000e+00 - 2s/epoch - 82ms/step
Epoch 2/500
23/23 - 0s - loss: 3.2499 - accuracy: 0.0435 - 51ms/epoch - 2ms/step
Epoch 3/500
23/23 - 0s - loss: 3.2427 - accuracy: 0.0435 - 46ms/epoch - 2ms/step
Epoch 4/500
23/23 - 0s - loss: 3.2360 - accuracy: 0.0435 - 51ms/epoch - 2ms/step
Epoch 5/500
23/23 - 0s - loss: 3.2300 - accuracy: 0.0435 - 45ms/epoch - 2ms/step
Epoch 6/500
23/23 - 0s - loss: 3.2221 - accuracy: 0.0435 - 48ms/epoch - 2ms/step
Epoch 7/500
23/23 - 0s - loss: 3.2151 - accuracy: 0.0435 - 48ms/epoch - 2ms/step
Epoch 8/500
23/23 - 0s - loss: 3.2069 - accuracy: 0.0435 - 50ms/epoch - 2ms/step
Epoch 9/500
23/23 - 0s - loss: 3.1993 - accuracy: 0.0435 - 47ms/epoch - 2ms/step
Epoch 10/500
23/23 - 0s - loss: 3.1900 - accuracy: 0.0435 - 48ms/epoch - 2ms/step
Epoch 11/500
23/23 - 0s - loss: 3.1813 - accuracy: 0.0435 - 52ms/epoch - 2ms/step
Epoch 12/500
23/23 - 0s - loss: 3.1721 - accuracy: 0.0435 - 50ms/epoch - 2ms/step
Epoch 13/500
23/23 - 0

<keras.src.callbacks.History at 0x788fc135c220>

In [20]:
# Performance do Modelo
scores = model.evaluate(X, y, verbose=0)
print("\nAcurácia do Modelo: %.2f%%" % (scores[1]*100))


Acurácia do Modelo: 82.61%


In [21]:
# Previsões
for pattern in dataX:
    x = numpy.reshape(pattern, (1, 1, len(pattern)))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print (seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> W
['T', 'U', 'V'] -> X
['U', 'V', 'W'] -> Y
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z
