In [1]:
import numpy as np

In [3]:
embeddingdim = 10
vocab = [char for char in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ-']
reverse_vocab = {x:y for y, x in enumerate(vocab)}
key = 15

In [4]:
print(len(vocab))

27


In [5]:
# embed_matrix = 1 - (np.random.rand(len(vocab), embeddingdim) * 2)

In [6]:
# print(embed_matrix)

In [7]:
print(reverse_vocab[' '])

KeyError: ' '

In [8]:
def encrypt(text):
    chars = [char for char in text]
    indxs = [(reverse_vocab[char] + key) % len(vocab) for char in chars]
    encrypted = ''.join([vocab[idx] for idx in indxs])
#     print(encrypted)
    return encrypted
    
    
encrypt('ABCDEFGHI')

'PQRSTUVWX'

In [9]:
import torch
import random

In [10]:
num_examples = 512
msg_length = 32
hidden_dim = 10

In [11]:
def str_to_tensor(str):
    return torch.tensor([reverse_vocab[idx] for idx in str])

In [12]:
def create_dataset(num_examples):
    dataset = []
    
    for x in range(num_examples):
        msg = ''.join([random.choice(vocab) for _ in range(msg_length)])
        msg_enc = encrypt(msg)
        
        orig_idx = [reverse_vocab[char] for char in msg]
        enc_idx = [reverse_vocab[char] for char in msg_enc]
        
        dataset.append([torch.tensor(enc_idx), torch.tensor(orig_idx)])
    
    return dataset

In [13]:
dataset = create_dataset(1)

In [14]:
dataset

[[tensor([ 9, 13, 18, 18,  4, 13,  8, 13, 15,  5, 13, 21, 22, 11, 23, 13,  6,  1,
           0, 15,  8,  9, 19,  7, 23, 10,  9, 25,  2, 18, 10, 10]),
  tensor([21, 25,  3,  3, 16, 25, 20, 25,  0, 17, 25,  6,  7, 23,  8, 25, 18, 13,
          12,  0, 20, 21,  4, 19,  8, 22, 21, 10, 14,  3, 22, 22])]]

In [15]:
embed = torch.nn.Embedding(len(vocab), embeddingdim)

In [16]:
lstm_in = torch.rand(40, 20, 5)

In [17]:
enc = torch.tensor([ 4, 4, 10,  2, 20, 11, 23,  9, 16,  9, 21, 12, 23, 26, 13, 19, 12,  1,
           8, 19,  0,  2, 13, 22, 26, 26, 16, 20,  2,  1, 23, 18])

In [39]:
bed = embed(torch.tensor([1,2,3]))
bed.shape

torch.Size([3, 10])

In [19]:
lstm = torch.nn.LSTM(embeddingdim, hidden_dim)

In [20]:
linear = torch.nn.Linear(hidden_dim, len(vocab))

In [21]:
loss_fn = torch.nn.CrossEntropyLoss()
softmax = torch.nn.functional.softmax

In [22]:
optimizer = torch.optim.Adam(list(embed.parameters()) + list(lstm.parameters())
                             + list(linear.parameters()), lr=0.001)


In [23]:
def zero_hidden():
    return (torch.zeros(1, 1, hidden_dim),
            torch.zeros(1, 1, hidden_dim))


In [24]:
num_epochs = 10

accuracies, max_accuracy = [], 0
for x in range(num_epochs):
    print('Epoch: {}'.format(x))
    for encrypted, original in create_dataset(num_examples):
        # encrypted.size() = [64]
        lstm_in = embed(encrypted)
        # lstm_in.size() = [64, 5]. This is a 2D tensor, but LSTM expects 
        # a 3D tensor. So we insert a fake dimension.
        lstm_in = lstm_in.unsqueeze(1)
        # lstm_in.size() = [64, 1, 5]
        # Get outputs from the LSTM.
        lstm_out, lstm_hidden = lstm(lstm_in, zero_hidden())
        # lstm_out.size() = [64, 1, 10]
        # Apply the affine transform.
        scores = linear(lstm_out)
        # scores.size() = [64, 1, 27], but loss_fn expects a tensor
        # of size [64, 27, 1]. So we switch the second and third dimensions.
        scores = scores.transpose(1, 2)
        # original.size() = [64], but original should also be a 2D tensor
        # of size [64, 1]. So we insert a fake dimension.
        original = original.unsqueeze(1)
        # Calculate loss.
        loss = loss_fn(scores, original) 
        # Backpropagate
        loss.backward()
        # Update weights
        optimizer.step()
    print('Loss: {:6.4f}'.format(loss.item()))


Epoch: 0
Loss: 0.4698
Epoch: 1
Loss: 0.0241
Epoch: 2
Loss: 0.0291
Epoch: 3
Loss: 0.0017
Epoch: 4
Loss: 0.0004
Epoch: 5
Loss: 0.0033
Epoch: 6
Loss: 0.0002
Epoch: 7
Loss: 0.0001
Epoch: 8
Loss: 0.0000
Epoch: 9
Loss: 0.0001


In [35]:
with torch.no_grad():
    string = 'WP--C'
    t = str_to_tensor(string)
    embedded = embed(t)
    embedded = embedded.unsqueeze(1)
    lstm_out, lsfm_hidden = lstm(embedded, zero_hidden())
    # print(lsfm_hidden)
    scores = linear(lstm_out)
    
    predictions = softmax(scores, dim=2)
    # print(predictions)
    x, bout = predictions.max(dim=2)
    bout = bout.squeeze(1)
#     print(bout)
    
    print(''.join(vocab[idx] for idx in bout))
    
#     print(lstm_out)
#     print(embedded)

print(string)

HALLO
WP--C


In [79]:
with torch.no_grad():
    matches, total = 0, 0
    for encrypted, original in create_dataset(num_examples):
        lstm_in = embed(encrypted)
        lstm_in = lstm_in.unsqueeze(1)
        lstm_out, lstm_hidden = lstm(lstm_in, zero_hidden())
        scores = linear(lstm_out)
        # Compute a softmax over the outputs
        predictions = softmax(scores, dim=2)
        # Choose the letter with the maximum probability
        _, batch_out = predictions.max(dim=2)
        # Remove fake dimension
        batch_out = batch_out.squeeze(1)
        # Calculate accuracy
        matches += torch.eq(batch_out, original).sum().item()
        total += torch.numel(batch_out)
    accuracy = matches / total
    print('Accuracy: {:4.2f}%'.format(accuracy * 100))


Accuracy: 99.86%
