In [114]:
# Reading the text file
with open('data/goblet_book.txt', 'r') as file:
    book_data = file.read()

# Getting unique characters
book_chars = sorted(set(book_data))
K = len(book_chars)  # dimensionality of the output (input) vector of your RNN

# Initializing maps

char_to_ind = {char: ind for ind, char in enumerate(book_chars)}
ind_to_char = {ind: char for ind, char in enumerate(book_chars)}



# Now, char_to_ind and ind_to_char can be used for converting between characters and their corresponding indices


# test using text "hello"
hello = 'hello'
hello_ind = [char_to_ind[char] for char in hello]
print(hello_ind)
print([ind_to_char[ind] for ind in hello_ind])


[58, 55, 62, 62, 65]
['h', 'e', 'l', 'l', 'o']


In [115]:
import numpy as np

# Set hyper-parameters
m = 100  # dimensionality of the hidden state
eta = 0.1  # learning rate
seq_length = 25  # length of the input sequences

# Initialize the RNN's parameters


class RNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1, seq_length=25, sigma=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.seq_length = seq_length
        

        # Initialize weights and biases
        self.W = np.random.randn(hidden_size, hidden_size) / np.sqrt(hidden_size)  # weights for hidden states
        self.U = np.random.randn(hidden_size, input_size) / np.sqrt(input_size)  # weights for inputs
        self.b = np.zeros((hidden_size, 1))                        # bias for hidden states
        self.V = np.random.randn(output_size, hidden_size) / np.sqrt(hidden_size) # weights for output
        self.c = np.zeros((output_size, 1))                        # bias for output


m = 100
eta = 0.1
seq_length = 25

# Initialize the RNN
rnn = RNN(K, m, K, learning_rate=eta, seq_length=seq_length)


In [116]:
def synthesize(rnn, h0, x0, n):
    h = h0
    x = x0
    Y = np.zeros((rnn.output_size, n))


    for t in range(n):
        # Compute the hidden state
        a = np.dot(rnn.W, h.ravel()) + np.dot(rnn.U, x.ravel()) + rnn.b.ravel()
        h = np.tanh(a)

        # Compute the output
        o = np.dot(rnn.V, h) + rnn.c.ravel()
        p = np.exp(o) / np.sum(np.exp(o))  # normalize to get probabilities

        # Sample a character index from the probability distribution
        ix = np.random.choice(range(rnn.output_size), p=p.ravel())

        # Update the input for the next time step
        x = np.zeros((rnn.input_size, 1))
        x[ix] = 1

        # Store the one-hot representation of the sampled character
        Y[:, t] = x.ravel()


    return Y

def one_hot_seq_to_char_seq(one_hot_seq, ind_to_char):
    N = one_hot_seq.shape[1]
    char_seq = ''.join([ind_to_char[np.argmax(one_hot_seq[:, i])] for i in range(N)])
    return char_seq

h0 = np.zeros((m, 1))
x0 = np.zeros((K, 1))
n = 100

Y = synthesize(rnn, h0, x0, n)
#text = ''.join(ind_to_char[np.argmax(Y[:, i])] for i in range(n))
text = one_hot_seq_to_char_seq(Y, ind_to_char)
print(text)


a.iDk9apKwVB})w(ün1	k-nqwNN•iYiURM;S0Y79OKYu/i0}kx)h/RLXr,fVPCm•e(.rDjuct'W6,Cdv(U
eKkMN- cIu
olp03F


In [119]:

def forward_backward_pass(rnn, X_chars, Y_chars, h0):
    # Convert the characters to one-hot encodings
    X = np.zeros((len(rnn.c), len(X_chars)))
    Y = np.zeros((len(rnn.c), len(Y_chars)))
    for t in range(len(X_chars)):
        X[char_to_ind[X_chars[t]], t] = 1
        Y[char_to_ind[Y_chars[t]], t] = 1

    # Initialize the hidden states and outputs
    h = np.zeros((rnn.b.shape[0], len(X_chars) + 1))
    h[:, 0] = h0.ravel()
    o = np.zeros((len(rnn.c), len(X_chars)))

    # Forward pass
    for t in range(len(X_chars)):
        a1 = np.dot(rnn.W, h[:, t]) 
        a2 = np.dot(rnn.U, X[:, t]) 

        rnn.b = rnn.b.ravel()
        a = a1 + a2 + rnn.b

        h[:, t + 1] = np.tanh(a).ravel()

        rnn.c = rnn.c.ravel()

        o[:, t] = (np.dot(rnn.V, h[:, t + 1]) + rnn.c).ravel()

    p = np.exp(o) / np.sum(np.exp(o), axis=0)  # normalize to get probabilities
    loss = -np.sum(Y * np.log(p))  # cross-entropy loss

    # Initialize the gradients
    grads = {
        'U': np.zeros_like(rnn.U),
        'W': np.zeros_like(rnn.W),
        'V': np.zeros_like(rnn.V),
        'b': np.zeros_like(rnn.b),
        'c': np.zeros_like(rnn.c)
    }

    # Backward pass
    dh_next = np.zeros_like(h[:, 0])
    for t in reversed(range(len(X_chars))):
        do = p[:, t] - Y[:, t]
        grads['V'] += np.outer(do, h[:, t + 1])
        grads['c'] += do.reshape(rnn.c.shape)

        dh = np.dot(rnn.V.T, do) + dh_next
        da = (1 - h[:, t + 1]**2) * dh

        grads['U'] += np.outer(da, X[:, t])
        grads['W'] += np.outer(da, h[:, t])
        grads['b'] += da.reshape(rnn.b.shape)

        dh_next = np.dot(rnn.W.T, da)

    # Clip the gradients to avoid exploding gradient problem
    for grad in grads.values():
        np.clip(grad, -5, 5, out=grad)

    return loss, grads



In [124]:
X_chars = book_data[0:seq_length]
Y_chars = book_data[1:seq_length + 1]


def one_hot_encode(sequence, K):
    # Create an identity matrix of size K
    I = np.eye(K)

    # Convert the sequence to a list of one-hot encoded vectors
    encoded_sequence = np.array([I[char_to_ind[ch]] for ch in sequence])

    return encoded_sequence

# One-hot encode the input sequences
X_chars_encoded = one_hot_encode(X_chars, K)
Y_chars_encoded = one_hot_encode(Y_chars, K)

print(X_chars)

def char_seq_to_one_hot(char_seq, char_to_ind, K):
    N = len(char_seq)
    one_hot_seq = np.zeros((K, N))
    for i, char in enumerate(char_seq):
        one_hot_seq[char_to_ind[char], i] = 1
    return one_hot_seq
X_ = char_seq_to_one_hot(X_chars, char_to_ind, K)
print(X_chars_encoded)
print("________")
print(X_)



loss, grads = forward_backward_pass(rnn, X_chars, Y_chars, h0)
for param, grad in grads.items():
    param_value = getattr(rnn, param)
    param_value -= eta * grad
    setattr(rnn, param, param_value)

print(loss)



HARRY POTTER AND THE GOBL
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


NameError: name 'pri' is not defined

In [None]:
from tqdm import tqdm
def train_rnn(rnn, book_data, char_to_ind, ind_to_char, n_epochs):
    K = rnn.output_size
    seq_length = rnn.seq_length

    iter_per_epoch = len(book_data) // seq_length
    updates = n_epochs * iter_per_epoch
    smooth_loss = -np.log(1.0 / K) * seq_length  # loss at iteration 0
    hprev = np.zeros((rnn.hidden_size, 1))
    ada_params = {k: np.zeros_like(getattr(rnn, k)) for k in ['U', 'W', 'V']}

    e= 0
    for update in tqdm(range(updates)):
        if e == 0 or e + seq_length + 1 > len(book_data):
            e = 1
            hprev = np.zeros((rnn.hidden_size, 1))  # reset RNN memory

        X_chars = book_data[e:e + seq_length]
        Y_chars = book_data[e + 1:e + seq_length + 1]

        X = one_hot_encode(X_chars, K)
        Y = one_hot_encode(Y_chars, K)

        loss, grads = forward_backward_pass(rnn, X, Y, hprev)

        # Update the parameters using Adagrad
        for param, grad in grads.items():
            param_value = getattr(rnn, param)
            ada_params[param] += grad**2
            param_value -= eta * grad / np.sqrt(ada_params[param] + 1e-8)
            setattr(rnn, param, param_value)

        smooth_loss = smooth_loss * 0.999 + loss * 0.001

        if update % 10000 == 0:
            print("Smooth loss at step {}: {}".format(update, smooth_loss))

        if update % 100000 == 0:
            Y_synthesized = synthesize(rnn, hprev, X[:, :1], 200)
            synthesized_seq = one_hot_seq_to_char_seq(Y_synthesized, ind_to_char)
            print("Synthesized text:\n", synthesized_seq)

        e += seq_length
rnn = train_rnn(rnn, book_data, char_to_ind, ind_to_char, n_epochs=7)

AttributeError: 'RNN2' object has no attribute 'seq_length'