In [13]:
import torch
import torch.nn as nn
import numpy as np
import string
import urllib.request
import re

# pobranie pliku tekstowego z tekstami szekspirowskimi
url = 'https://homl.info/shakespeare'
response = urllib.request.urlopen(url)
data = response.read().decode('utf-8')

# czyszczenie tekstu ze zbędnych znaków
data = re.sub(r'\n', ' ', data)
data = re.sub(r'[\t]+', ' ', data)
data = re.sub(r'[^\x00-\x7F]+', '', data)
data = data.translate(str.maketrans('', '', string.punctuation))

# utworzenie słownika znaków
chars = list(set(data))
char_to_int = {ch:i for i, ch in enumerate(chars)}
int_to_char = {i:ch for i, ch in enumerate(chars)}

# podział danych na wejście i wyjście
max_length = 100
stride = 3
inputs = []
outputs = []
for i in range(0, len(data) - max_length, stride):
    inputs.append(data[i:i+max_length])
    outputs.append(data[i+max_length])

# zamiana danych na tensorowe tablice numpy
X = np.zeros((len(inputs), max_length, len(chars)), dtype=np.float32)
y = np.zeros((len(inputs), max_length), dtype=np.int64)
for i, input_text in enumerate(inputs):
    for j, ch in enumerate(input_text):
        X[i, j, char_to_int[ch]] = 1
        y[i, j] = char_to_int[outputs[i][j]]

# implementacja modelu Char-RNN w PyTorch

    
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = nn.Dropout(dropout)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, input_size)
    
    def forward(self, x, hidden):
        out, hidden = self.lstm(x, hidden)
        out = self.dropout(out)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device))


# definicja hiperparametrów
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
batch_size = 128
learning_rate = 0.01
num_epochs = 20
num_layers = 2
dropout = 0.2


# utworzenie instancji modelu Char-RNN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharRNN(input_size, hidden_size, num_layers, dropout)
model.to(device)

IndexError: string index out of range

In [12]:
# utworzenie funkcji straty i optymalizatora
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# trening modelu
for epoch in range(num_epochs):
    loss = 0
    hidden = model.init_hidden(batch_size)
    for i in range(0, X.shape[0], batch_size):
        if i + batch_size > X.shape[0]:
            continue
        inputs = torch.from_numpy(X[i:i+batch_size]).to(device)
        targets = torch.from_numpy(y[i:i+batch_size]).to(device)
        model.zero_grad()
        hidden = (hidden[0].detach(), hidden[1].detach())
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs.transpose(1, 2), targets.view(batch_size, -1))
        loss.backward()
        optimizer.step()
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

# funkcja do generowania tekstu na podstawie modelu
def generate_text(model, start_text, length=100):
    with torch.no_grad():
        hidden = model.init_hidden(1)
        input = torch.zeros((1, max_length, input_size))
        input[0, :len(start_text), :] = torch.Tensor([char_to_int[ch] for ch in start_text])
        output = start_text
        for i in range(length):
            output_probs, hidden = model(input.to(device), hidden)
            output_probs = output_probs[-1].squeeze()
            output_probs = torch.softmax(output_probs, dim=0).cpu().numpy()
            selected = np.random.choice(len(output_probs), p=output_probs)
            selected_char = int_to_char[selected]
            output += selected_char
            input = torch.roll(input, -1, dims=1)
            input[0, -1, :] = torch.zeros((input_size,))
            input[0, -1, selected] = 1
        return output

# wygenerowanie tekstu na podstawie modelu
start_text = 'To be or not to be'
generated_text = generate_text(model, start_text, length=100)
print(start_text + generated_text)

RuntimeError: Expected target size [128, 100], got [128, 54]

In [None]:
# funkcja czatbota
def chatbot():
    print('Hello! I am Shakespeare Bot. Ask me anything about Shakespeare!')
    while True:
        user_input = input('> ')
        if user_input.lower() in ['bye', 'goodbye']:
            print('Goodbye!')
            break
        generated_text = generate_text(model, user_input, length=100)
        print(generated_text)