## Import dependencies

In [5]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import os

In [6]:
with open('tragedias_shakespeare_filtered.txt') as file:
    text = file.read()

In [10]:
text[400:1000]

' con sus espadas.\nYo soy el primogénito del último\nque portó la diadema imperial de Roma;\ndejen pues que el honor de mi padre viva en mí,\ny no ultrajen mi mayorazgo con este agravio.\nBASIANO (A sus seguidores .)\nRomanos, amigos, seguidores,\ndefensores de mis derechos,\nsi alguna vez Basiano, hijo de César,\nfue visto con gracia por la real Roma,\ncuiden entonces este paso al Capitolio,\ny no padezcan el deshonor de acercarse\nal trono imperial, consagrado a la virtud,\nla justicia, la continencia, la nobleza;\ndejen que el mérito brille en elección transparente,\n\x0cy, romanos, peleen por su libertad de'

In [22]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# encode the text
encoded = np.array([char2int[ch] for ch in text])

len(encoded)

1410105

In [27]:
def one_hot_encode(arr, n_labels):

    one_hot_array = np.zeros((np.multiply(*arr.shape),n_labels),dtype=np.float32)

    one_hot_array[np.arange(one_hot_array.shape[0]),arr.flatten()] = 1.

    one_hot_array = one_hot_array.reshape((*arr.shape, n_labels))

    return one_hot_array

In [31]:


def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.

       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''

    batch_size_total = batch_size * seq_length
    # total number of batches we can make, // integer division, round down
    n_batches = len(arr)//batch_size_total

    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows, n. of first row is the batch size, the other lenght is inferred
    arr = arr.reshape((batch_size, -1))

    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [34]:
batches = get_batches(encoded, 8, 50)
x, y = next(batches)

print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[61 36 26 25 72 36  8 15 14 68]
 [45  8 82 45 39 70  7 82 37 82]
 [37  8 88  7 84 44 63 82  7 37]
 [45 39  8 46 37 39  8 70 37 39]
 [71 56 19 46 37 35 37 54  8 37]
 [52 48 63  8 54 83 45 82 35 37]
 [44 37 46 45  8 84 33 35 63  8]
 [45  8 37 34 83 59 71 56 72 69]]

y
 [[36 26 25 72 36  8 15 14 68 51]
 [ 8 82 45 39 70  7 82 37 82 43]
 [ 8 88  7 84 44 63 82  7 37 24]
 [39  8 46 37 39  8 70 37 39 37]
 [56 19 46 37 35 37 54  8 37 46]
 [48 63  8 54 83 45 82 35 37 39]
 [37 46 45  8 84 33 35 63  8 44]
 [ 8 37 34 83 59 71 56 72 69 44]]


In [35]:
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else:
    print('No GPU available, training on CPU; consider making n_epochs very small.')

No GPU available, training on CPU; consider making n_epochs very small.
