In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
%%capture
np.random.seed(0)
torch.manual_seed(0)

In [3]:
CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" \
        + "!\"#$%&\'()*+,-./:;—<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c"

corpus = []
with open('shakespeare.txt', 'r') as f:
    for line in f:
        for char in line.strip():
            corpus.append(char)
        corpus.append('\n')

print("Total number of characters:", len(corpus))
print("\n\n")
print("First 100 characters:\n")
print(corpus[:100])

Total number of characters: 1115390



First 100 characters:

['F', 'i', 'r', 's', 't', ' ', 'C', 'i', 't', 'i', 'z', 'e', 'n', ':', '\n', 'B', 'e', 'f', 'o', 'r', 'e', ' ', 'w', 'e', ' ', 'p', 'r', 'o', 'c', 'e', 'e', 'd', ' ', 'a', 'n', 'y', ' ', 'f', 'u', 'r', 't', 'h', 'e', 'r', ',', ' ', 'h', 'e', 'a', 'r', ' ', 'm', 'e', ' ', 's', 'p', 'e', 'a', 'k', '.', '\n', '\n', 'A', 'l', 'l', ':', '\n', 'S', 'p', 'e', 'a', 'k', ',', ' ', 's', 'p', 'e', 'a', 'k', '.', '\n', '\n', 'F', 'i', 'r', 's', 't', ' ', 'C', 'i', 't', 'i', 'z', 'e', 'n', ':', '\n', 'Y', 'o', 'u']


In [4]:
char2idx = {char : i for i, char in enumerate(CHARS)}
idx2char = {i : char for i, char in enumerate(CHARS)}

NUM_CHARS = len(char2idx)
print("Total number of distinct chars:", NUM_CHARS)

Total number of distinct chars: 101


In [5]:
corpus_with_indices = [char2idx[char] for char in corpus]

print("Corpus with indices:")
print(corpus_with_indices[:100])

SIZE_OF_SNIPPET = 250
dataset = []
for _ in range(2000):
    
    snipped_start = np.random.randint(0, len(corpus_with_indices) - SIZE_OF_SNIPPET)
    snipped = corpus_with_indices[snipped_start:snipped_start + SIZE_OF_SNIPPET]
    
    dataset.append((
        torch.LongTensor(snipped[:-1]),
        torch.LongTensor(snipped[1:])
    ))

print("\nSize of dataset:", len(dataset))

X = torch.stack([xy[0] for xy in dataset])
Y = torch.stack([xy[1] for xy in dataset])

Corpus with indices:
[41, 18, 27, 28, 29, 95, 38, 18, 29, 18, 35, 14, 23, 77, 97, 37, 14, 15, 24, 27, 14, 95, 32, 14, 95, 25, 27, 24, 12, 14, 14, 13, 95, 10, 23, 34, 95, 15, 30, 27, 29, 17, 14, 27, 73, 95, 17, 14, 10, 27, 95, 22, 14, 95, 28, 25, 14, 10, 20, 75, 97, 97, 36, 21, 21, 77, 97, 54, 25, 14, 10, 20, 73, 95, 28, 25, 14, 10, 20, 75, 97, 97, 41, 18, 27, 28, 29, 95, 38, 18, 29, 18, 35, 14, 23, 77, 97, 60, 24, 30]

Size of dataset: 2000


In [6]:
class ShakespeareGenerator(nn.Module):

    def __init__(self, embedding_size, hidden_size):

        super().__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(
            num_embeddings=NUM_CHARS,
            embedding_dim=self.embedding_size
        )
        self.lstm = nn.LSTM(
            input_size=self.embedding_size,
            hidden_size=self.hidden_size
        )
        self.linear = nn.Linear(
            in_features=self.hidden_size,
            out_features=NUM_CHARS
        )


    def forward(self, batched_inputs):

        batch_size = batched_inputs.shape[1]
        h, c = self.get_initial_hc(batch_size)
        seq_len = batched_inputs.shape[0]

        embeddings = self.embedding(batched_inputs)
        outputs, (h, c) = self.lstm(
                embeddings.reshape(seq_len, batch_size, self.embedding_size),
                (h, c)
        )
        outputs = self.linear(torch.squeeze(outputs))

        return outputs, (h, c)


    def get_initial_hc(self, batch_size):

        return (torch.zeros(1, batch_size, self.hidden_size),
                torch.zeros(1, batch_size, self.hidden_size))


    def generate(self, initial_token=' ', num_tokens=100, temperature=1):
        
        with torch.no_grad():
        
            token = torch.LongTensor([char2idx[initial_token]])
            h, c = self.get_initial_hc(1)
            chars = []
            
            for _ in range(num_tokens):
                
                chars.append(idx2char[token.item()])
                
                inp = self.embedding(token)
                out, (h, c) = self.lstm(inp.reshape(1, 1, self.embedding_size), (h, c))
                dist = self.linear(out.reshape(1, -1))
                dist = dist.data.view(-1).div(temperature).exp()
                chosen_i = torch.multinomial(dist, 1)[0]
                token = torch.LongTensor([chosen_i])
                
            return ''.join(chars[1:])


In [7]:
EPOCHS = 500
LR = 0.1
BETA = 0.8
EMBEDDING_SIZE = 100
HIDDEN_SIZE = 64

USE_PRETRAINED = True

net = ShakespeareGenerator(EMBEDDING_SIZE, HIDDEN_SIZE).float()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=BETA)

if USE_PRETRAINED:
    net.load_state_dict(torch.load('shakespeare.pt', map_location=lambda storage, loc: storage))
    
else:
    for _ in range(EPOCHS):

        output, _ = net(X.transpose(0, 1))
        output = output.transpose(0, 1)

        loss = criterion(output.reshape(-1, NUM_CHARS), Y.reshape(-1))

        print(loss.item())
        net.zero_grad()
        loss.backward()
        optimizer.step()
    

In [8]:
print(net.generate(temperature=1, num_tokens=1000))

Trand the dight, of can and to't not Stere.

DUKE VINCENTIO:
Am ill shall of pryrider:
Come he shall ams most whence he suffy most,
Apbante,
Which father, thou two with onge a speading to breagued hersels tougibeariet you had his thredly your good me; and ontage
Thou en me rade?

KING HENRY VI:
Brore sized, guer.

AUFERON:
And and it a sustion burston give visinent:
KING
Spest no more might powion, that aquers sometance?
Nawres.
As the distrucicitse: goilt a feards, mispoice, sold of mustage is corare Claudw'cared
How deiple.

PETRUCHIO:
What of above are the wothing: thing steen,
And Riunts you.

Sthear a suckem; end desuctaitle
Besest jeart man some the pray thelace,
whither no true?
Have entaither by histing thing in gendlonequ: mill nep if do to can desomelake conqonnow though your feence calient, he no do see again and huddongy now I that you conjest folke
faress, pown. I dust this night, Eranse of to more reven aleled.

CLAUDIO:
Thighth all, you untime be wheed
With him aled can


In [9]:
print(net.generate(temperature=1.5, num_tokens=1000))

kinyow, to Fixtgate-berder? Citer lit: you I,
whattincior,
If peo whou,
Do

EDWARD:
Grth
acly him;
The am-end scieve
My
ar orn
And him it meought, no;
Heep give yet my Defid, bid!

LART:
O irer.
Lorjont, never.

ATlurs,
Some, my remajedancars
Yethiniable;
Craturn'd?

CAMELLERSE:
I be ratdech'ces you a from that;
Of vientomedfore give, cals.

Thim;
Tno lappal-what if like you, heixte'll shrivin; that this are dist!
Mares Kience arrably.

JULIETER:
Meen,
Plaught break, Stquecy-paw'd natue he mire, his clantothan? gell fulm tiscie; more now and Nocry this lordsain pady jourtageferther
blobbe
Thenir,--
OLvar'd what of my vasase 'twurim! Ay.

CAMILLO:
Razpanning by?
They pargait wea,
Go; nigh! solet'cain. Leisely unted?
 meys?
Come Tydol.

YORY ANE VISAN:
'Blo scoing,
YORK:
For' gitifors, this theix thir's exace.
Sham sigong over tigant:
Let taxtiwould
'Tibl EpSHN PORD:
hat's gix yange,' forn aking?
Nuceld:
Hould, toward,
Have motewared.
Co lold?
Naye good ceptal dust;
Renttedw's viorake, 


In [10]:
print(net.generate(temperature=0.25, num_tokens=1000))

the sea the seem the father shall his with the some shall the shall make the prick the see the death the death and the sea the son of the love the man and the so man to the should shall me the sea the father the live the son the father be the see the so be my countresseld the sent the lord, the sight a shall the sea the be a see the hour the say the shall the shall to the shall you have here the see the sea the see the so man the soon the sometant the strent the man the so for my counted the some the lords the so man the be me the for the shall the so man the see the so father the live the so man the death the stain my lord, the shall the sea the live the countere the pery come the should stand the death shall be the death the so shall the see shall be my so fair me the son the so should the sea the dear the sea the seemence the shall the death as the shall the seem the not the see the sease the grace the heart he have the so disting the so man the heart the seeman the come the sea th
