<a href="https://colab.research.google.com/github/SeokhunEom/DeepLearning-study/blob/main/Section_12_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np  # Importing numpy for array manipulations
import tensorflow as tf  # Importing TensorFlow for building and training the neural network

In [2]:
# Define the mapping from index to character for the given sequence
idx2char = ['h', 'i', 'e', 'l', 'o']

# Define the training data: the target output sequence 'ihello' (encoded as indices)
y_data = [[1, 0, 2, 3, 3, 4]]  # Corresponding to 'ihello'

In [3]:
# Define the number of classes (distinct characters)
num_classes = 5
# Define the input dimension for one-hot encoding and hidden state size
input_dim = 5  # Input size (one-hot encoding vector size)
# Define the length of the input sequence
sequence_length = 6  # Length of the sequence 'ihello'
# Define the learning rate for the optimizer
learning_rate = 0.1

In [4]:
# Create the one-hot encoded input data for the sequence 'hihell'
x_one_hot = np.array([[[1, 0, 0, 0, 0],    # 'h' one-hot encoded
                       [0, 1, 0, 0, 0],    # 'i' one-hot encoded
                       [1, 0, 0, 0, 0],    # 'h' one-hot encoded
                       [0, 0, 1, 0, 0],    # 'e' one-hot encoded
                       [0, 0, 0, 1, 0],    # 'l' one-hot encoded
                       [0, 0, 0, 1, 0]]],  # 'l' one-hot encoded
                     dtype=np.float32)

In [5]:
# Convert the target output sequence to one-hot encoding
y_one_hot = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)
print(x_one_hot.shape)  # Print the shape of the input data for verification
print(y_one_hot.shape)  # Print the shape of the target data for verification

(1, 6, 5)
(1, 6, 5)


In [6]:
# Initialize a sequential model
tf.model = tf.keras.Sequential()

# Create an LSTM cell with the specified number of units (num_classes) and input shape
cell = tf.keras.layers.LSTMCell(units=num_classes, input_shape=(sequence_length, input_dim))
# Add the LSTM cell to the RNN layer of the model, setting return_sequences to True to return the full sequence
tf.model.add(tf.keras.layers.RNN(cell=cell, return_sequences=True))

# Alternatively, a single LSTM layer can be used instead of an LSTMCell
# tf.model.add(tf.keras.layers.LSTM(units=num_classes, input_shape=(sequence_length, input_dim), return_sequences=True))

# Add a fully connected (dense) layer with softmax activation for output
tf.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=num_classes, activation='softmax')))

In [7]:
# Compile the model with categorical crossentropy loss and Adam optimizer
tf.model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                 metrics=['accuracy'])

# Train the model using the provided one-hot encoded input and target data for 50 epochs
tf.model.fit(x_one_hot, y_one_hot, epochs=50)
# Print the summary of the model architecture
tf.model.summary()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rnn (RNN)                   (None, 6, 5)              220       
                                                                 
 time_distributed (TimeDist  (None, 6, 5)              30        
 ributed)                                                  

In [8]:
# Make predictions using the trained model
predictions = tf.model.predict(x_one_hot)
# Iterate over the predictions to display the results
for i, prediction in enumerate(predictions):
    print(prediction)  # Print the raw predictions
    # Convert the predicted indices to characters using the idx2char mapping
    result_str = [idx2char[c] for c in np.argmax(prediction, axis=1)]
    # Print the predicted sequence as a string
    print("\tPrediction str: ", ''.join(result_str))

[[4.6571100e-04 9.9556267e-01 3.7608400e-03 6.9153793e-06 2.0387604e-04]
 [9.9901760e-01 6.5873057e-04 1.4395500e-04 1.7950480e-04 2.2292728e-07]
 [3.1859829e-04 2.4141448e-03 9.9603003e-01 1.2296947e-03 7.4477102e-06]
 [2.3831241e-04 2.7018420e-08 4.1697978e-05 9.9969232e-01 2.7596372e-05]
 [4.7771046e-06 1.6332017e-07 2.3079560e-04 9.9919218e-01 5.7207589e-04]
 [6.9494277e-08 1.2292807e-03 2.1146981e-05 9.7870862e-04 9.9777079e-01]]
	Prediction str:  ihello


In [9]:
# Define the sentence to be used for training the RNN model
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [10]:
# Create a list of unique characters in the sentence
char_set = list(set(sentence))
# Create a dictionary mapping characters to unique indices
char_dic = {w: i for i, w in enumerate(char_set)}

In [11]:
# Define dimensions for the model
data_dim = len(char_set)  # Size of the input dimension (number of unique characters)
hidden_size = len(char_set)  # Size of the hidden layer (same as the input dimension)
num_classes = len(char_set)  # Number of classes (same as the input dimension)
sequence_length = 10  # Length of the sequence for training
learning_rate = 0.1  # Learning rate for the optimizer

In [12]:
# Initialize lists to hold input and output sequences
dataX = []
dataY = []
# Generate input (x) and output (y) sequences from the sentence
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]  # Extract input sequence
    y_str = sentence[i + 1: i + sequence_length + 1]  # Extract output sequence (shifted by one character)
    print(i, x_str, '->', y_str)  # Print the input and output sequence pairs for verification

    x = [char_dic[c] for c in x_str]  # Convert input sequence to indices
    y = [char_dic[c] for c in y_str]  # Convert output sequence to indices

    dataX.append(x)  # Append input sequence to dataX
    dataY.append(y)  # Append output sequence to dataY

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [13]:
# Define the batch size as the number of sequences
batch_size = len(dataX)

# One-hot encode the input and output sequences
X_one_hot = tf.one_hot(dataX, num_classes)
Y_one_hot = tf.one_hot(dataY, num_classes)

# Print the shapes of the one-hot encoded input and output data for verification
print(X_one_hot.shape)  # Expected shape: (number of sequences, sequence length, number of unique characters)
print(Y_one_hot.shape)  # Expected shape: (number of sequences, sequence length, number of unique characters)

(170, 10, 25)
(170, 10, 25)


In [14]:
# Initialize a sequential model
tf.model = tf.keras.Sequential()

# Add the first LSTM layer with the specified number of units and input shape, return sequences for the next LSTM layer
tf.model.add(tf.keras.layers.LSTM(units=num_classes, input_shape=(sequence_length, X_one_hot.shape[2]), return_sequences=True))
# Add a second LSTM layer, return sequences to match the output shape
tf.model.add(tf.keras.layers.LSTM(units=num_classes, return_sequences=True))
# Add a fully connected (dense) layer with softmax activation to output probabilities for each character
tf.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=num_classes, activation='softmax')))

In [15]:
# Print the summary of the model architecture
tf.model.summary()

# Compile the model with categorical crossentropy loss and Adam optimizer
tf.model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                 metrics=['accuracy'])

# Train the model using the provided one-hot encoded input and target data for 100 epochs
tf.model.fit(X_one_hot, Y_one_hot, epochs=100)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 25)            5100      
                                                                 
 lstm_1 (LSTM)               (None, 10, 25)            5100      
                                                                 
 time_distributed_1 (TimeDi  (None, 10, 25)            650       
 stributed)                                                      
                                                                 
Total params: 10850 (42.38 KB)
Trainable params: 10850 (42.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoc

<keras.src.callbacks.History at 0x7b3f109221a0>

In [16]:
# Make predictions using the trained model
results = tf.model.predict(X_one_hot)

# Iterate over the predictions to display the results
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)  # Get the index of the highest probability character for each position
    if j == 0:  # For the first sequence, print the entire predicted sequence to form a sentence
        print(''.join([char_set[t] for t in index]), end='')
    else:  # For subsequent sequences, print only the last character to continue the sentence
        print(char_set[index[-1]], end='')

t you want to build a ship, don't drum up people together to collect wood and don't dssign them tasks and work, but rather teach them to long for the endless immensity of the sea.