In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import string
from string import ascii_letters

In [2]:
def caesar_cipher(text, shift):
    alphabet_lower = string.ascii_lowercase + " "
    alphabet_upper = string.ascii_uppercase
    shifted_alphabet_lower = alphabet_lower[shift:] + alphabet_lower[:shift]
    shifted_alphabet_upper = alphabet_upper[shift:] + alphabet_upper[:shift]
    return "".join(
        (
            shifted_alphabet_lower[alphabet_lower.index(c)]
            if c in alphabet_lower
            else shifted_alphabet_upper[alphabet_upper.index(c)]
        )
        for c in text
    )

In [3]:
text = "The quick brown fox jumps over the lazy dog. This sentence contains all the letters in the English alphabet. Artificial intelligence is transforming industries worldwide. Machine learning models require large datasets for training. The sun rises in the east and sets in the west. Python is a popular programming language for data science. History repeats itself, but each time the price goes up. Innovation distinguishes between a leader and a follower. The only way to do great work is to love what you do. Life is what happens when you are busy making other plans. Science is organized knowledge; wisdom is organized life. Water covers approximately 71% of the Earths surface. The human brain has about 86 billion neurons. Quantum computing promises to revolutionize cryptography. Renewable energy sources are critical for sustainability. The Internet of Things connects everyday devices to the cloud.Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam euismod odio at libero volutpat, id fermentum nisi finibus. Curabitur quis enim vel turpis malesuada tincidunt. Sed non neque ut erat aliquam rhoncus. Fusce in lectus id mi tempus aliquet."

In [4]:
len(text)

1155

In [5]:
preprocessed_text = [simb for simb in text if simb in (ascii_letters + " ")]

In [6]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(
            hidden_size, hidden_size, num_layers=num_layers, batch_first=True
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden

    def init_hidden(self, batch_size):
        return torch.randn(self.num_layers, batch_size, self.hidden_size)

In [7]:
def prepare_data(num_samples, max_length):
    X, y = [], []
    alphabet = string.ascii_letters + " "

    for _ in range(num_samples):
        idx = random.randint(0, len(preprocessed_text) - 1)
        length = random.randint(5, max_length)
        text = "".join(
            preprocessed_text[idx : min(idx + length, len(preprocessed_text))]
        )
        shift = 5

        encrypted = caesar_cipher(text, shift)
        x_seq = [alphabet.index(c) for c in encrypted]
        y_seq = [alphabet.index(c) for c in text]

        X.append(x_seq)
        y.append(y_seq)

    return X, y

def prepare_train(X, y, max_length):
    X_padded = torch.zeros((len(X), max_length), dtype=torch.long)
    y_padded = torch.zeros((len(y), max_length), dtype=torch.long)

    for i, (x_seq, y_seq) in enumerate(zip(X, y)):
        length = len(x_seq)
        X_padded[i, :length] = torch.tensor(x_seq)
        y_padded[i, :length] = torch.tensor(y_seq)

    return X_padded, y_padded

In [8]:
def train_model(model, X, y, epochs, batch_size, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(X), batch_size):
            batch_X = X[i : i + batch_size].to(device)
            batch_y = y[i : i + batch_size].to(device)

            hidden = model.init_hidden(batch_X.size(0)).to(device)
            optimizer.zero_grad()
            outputs, hidden = model(batch_X, hidden)

            loss = criterion(outputs.view(-1, 53), batch_y.view(-1))

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(
            f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / (len(X) // batch_size):.4f}"
        )

In [23]:
n_samples = 100
max_len = 20
hidden_size = 128
epochs = 20
batch_size = 32


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

X, y = prepare_data(n_samples, max_len)
X_tensor, y_tensor = prepare_train(X, y, max_len)
X_tensor.size()

cuda


torch.Size([100, 20])

In [13]:
model = RNN(
    input_size=53, hidden_size=hidden_size, output_size=53, num_layers=5
).to(device)
train_model(model, X_tensor, y_tensor, epochs, batch_size, device)

Epoch 1/20, Loss: 4.7002
Epoch 2/20, Loss: 3.6164
Epoch 3/20, Loss: 3.1977
Epoch 4/20, Loss: 2.8406
Epoch 5/20, Loss: 2.5721
Epoch 6/20, Loss: 2.3378
Epoch 7/20, Loss: 2.1393
Epoch 8/20, Loss: 1.9438
Epoch 9/20, Loss: 1.7482
Epoch 10/20, Loss: 1.5839
Epoch 11/20, Loss: 1.4108
Epoch 12/20, Loss: 1.2638
Epoch 13/20, Loss: 1.1154
Epoch 14/20, Loss: 0.9918
Epoch 15/20, Loss: 0.8708
Epoch 16/20, Loss: 0.7674
Epoch 17/20, Loss: 0.6684
Epoch 18/20, Loss: 0.5813
Epoch 19/20, Loss: 0.5120
Epoch 20/20, Loss: 0.4391


In [14]:
def decrypt_text(model, encrypted_text, max_length, device):
    alphabet = string.ascii_letters + " "
    model.eval()

    x_seq = [alphabet.index(c) for c in encrypted_text if c in alphabet]
    x_tensor = torch.zeros((1, max_length), dtype=torch.long).to(device)
    x_tensor[0, : len(x_seq)] = torch.tensor(x_seq)

    with torch.no_grad():
        hidden = model.init_hidden(1).to(device)
        outputs, _ = model(x_tensor, hidden)
        _, predicted = torch.max(outputs, dim=2)

    decrypted = "".join(alphabet[idx] for idx in predicted[0][: len(x_seq)])
    return decrypted

In [18]:
test_text = "test rnn infer"
shift = 5
encrypted = caesar_cipher(test_text, shift)
print(f"Оригинальный текст: {test_text}")
print(f"Зашифрованный текст: {encrypted}")
decrypted = decrypt_text(model, encrypted, max_len, device)
print(f"Дешифрованный текст: {decrypted}")

Оригинальный текст: test rnn infer
Зашифрованный текст: yjxyewssenskjw
Дешифрованный текст: test rnn inger
