In [10]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [11]:
class CharacterRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(CharacterRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Define the RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        # Output layer
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        # Forward pass through RNN
        out, hidden = self.rnn(x, hidden)
        # Pass through final layer
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        # Initialize hidden state
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [12]:
class TextGenerator:
    def __init__(self, text, sequence_length=6):
        self.text = text
        self.sequence_length = sequence_length
        self.chars = sorted(list(set(text)))
        self.char_to_idx = {char: i for i, char in enumerate(self.chars)}
        self.idx_to_char = {i: char for i, char in enumerate(self.chars)}
        self.vocab_size = len(self.chars)
    
    def prepare_data(self):
        """Create training sequences and corresponding targets"""
        sequences = []
        next_chars = []
        
        for i in range(0, len(self.text) - self.sequence_length):
            sequences.append(self.text[i:i + self.sequence_length])
            next_chars.append(self.text[i + self.sequence_length])
        
        # Convert to one-hot encoded vectors
        X = np.zeros((len(sequences), self.sequence_length, self.vocab_size))
        y = np.zeros((len(sequences), self.vocab_size))
        
        for i, sequence in enumerate(sequences):
            for t, char in enumerate(sequence):
                X[i, t, self.char_to_idx[char]] = 1
            y[i, self.char_to_idx[next_chars[i]]] = 1
        
        return torch.FloatTensor(X), torch.FloatTensor(y)
    
    def train_model(self, hidden_size=128, num_layers=1, num_epochs=100, batch_size=128, learning_rate=0.001):
        """Train the RNN model"""
        X, y = self.prepare_data()
        
        model = CharacterRNN(self.vocab_size, hidden_size, self.vocab_size, num_layers)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Training loop
        for epoch in range(num_epochs):
            model.zero_grad()
            hidden = model.init_hidden(X.size(0))
            
            output, hidden = model(X, hidden)
            loss = criterion(output[:, -1, :], torch.max(y, 1)[1])
            
            loss.backward()
            optimizer.step()
            
            if (epoch + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
        return model
    
    def generate_text(self, model, seed_text, length=100, temperature=0.5):
        """Generate text using the trained model with fixed length seed text"""
        # Проверка длины seed_text
        if len(seed_text) != self.sequence_length:
            raise ValueError(f"Seed text must be exactly {self.sequence_length} characters long")
        
        # Проверка символов в словаре
        for char in seed_text:
            if char not in self.char_to_idx:
                raise ValueError(f"Character '{char}' not found in training vocabulary")
        
        model.eval()
        current_sequence = seed_text
        generated_text = seed_text
        
        # Generate new characters
        for _ in range(length):
            # Prepare input
            x = np.zeros((1, self.sequence_length, self.vocab_size))
            for t, char in enumerate(current_sequence):
                x[0, t, self.char_to_idx[char]] = 1
            x = torch.FloatTensor(x)
            
            # Forward pass
            hidden = model.init_hidden(1)
            output, hidden = model(x, hidden)
            
            # Apply temperature and get probabilities
            output = output[0, -1, :] / temperature
            probs = torch.softmax(output, dim=0).detach().numpy()
            
            # Sample next character
            next_char_idx = np.random.choice(len(probs), p=probs)
            next_char = self.idx_to_char[next_char_idx]
            
            # Update sequences
            generated_text += next_char
            current_sequence = current_sequence[1:] + next_char
        
        return generated_text



In [22]:
# Инициализация с обучающим текстом
with open('text.txt', 'r', encoding='utf-8') as file:
    sample_text = file.read()

# Создание и обучение модели
generator = TextGenerator(sample_text, sequence_length=6)
model = generator.train_model(num_epochs=1000)

Epoch [10/1000], Loss: 4.5528
Epoch [20/1000], Loss: 3.9905
Epoch [30/1000], Loss: 3.9403
Epoch [40/1000], Loss: 3.9174
Epoch [50/1000], Loss: 3.9011
Epoch [60/1000], Loss: 3.8769
Epoch [70/1000], Loss: 3.7958
Epoch [80/1000], Loss: 3.6049
Epoch [90/1000], Loss: 3.5251
Epoch [100/1000], Loss: 3.4818
Epoch [110/1000], Loss: 3.4413
Epoch [120/1000], Loss: 3.3979
Epoch [130/1000], Loss: 3.3480
Epoch [140/1000], Loss: 3.2921
Epoch [150/1000], Loss: 3.2316
Epoch [160/1000], Loss: 3.1674
Epoch [170/1000], Loss: 3.0964
Epoch [180/1000], Loss: 3.0195
Epoch [190/1000], Loss: 2.9385
Epoch [200/1000], Loss: 2.8551
Epoch [210/1000], Loss: 2.7690
Epoch [220/1000], Loss: 2.6820
Epoch [230/1000], Loss: 2.5962
Epoch [240/1000], Loss: 2.5145
Epoch [250/1000], Loss: 2.4377
Epoch [260/1000], Loss: 2.3653
Epoch [270/1000], Loss: 2.2979
Epoch [280/1000], Loss: 2.2348
Epoch [290/1000], Loss: 2.1759
Epoch [300/1000], Loss: 2.1207
Epoch [310/1000], Loss: 2.0696
Epoch [320/1000], Loss: 2.0206
Epoch [330/1000],

In [25]:
# Правильное использование: seed_text ровно 6 символов
seed_text = "button"  # 6 символов
generated = generator.generate_text(model, seed_text, length=100)
print(f"\nСгенерированный текст:\n{generated}")


Сгенерированный текст:
button onClick={toHtml:

// src/TextEditorApi = {
  p..

    return currentState((currentState. seCSmate =


In [None]:
# латентное представление
# латентное пространство