In [19]:
import torch
import torch.nn as nn
import torch.optim as optim

In [95]:
with open(r'C:\Users\310\Desktop\Progects_Py\data\studying-stuff_data\character_sequence_2.txt', 'r') as file:
    text = file.read()
    print(len(text))

2108


In [21]:
# Create a sorted list of unique characters in the text
chars = sorted(list(set(text)))
# Map each character to an index and create a reverse mapping
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Vocabulary size
vocab_size = len(chars)
print("Vocabulary Size:", vocab_size)

# Encode the entire text as a list of integer IDs
encoded_text = [char_to_idx[char] for char in text]

Vocabulary Size: 37


In [22]:
train_ratio = 0.8
split_index = int(len(encoded_text) * train_ratio)

# Split the encoded text
train_text = encoded_text[:split_index]
test_text = encoded_text[split_index:]

In [96]:
#Train sequences
sequence_length = 74  # Adjust based on your model's requirements
batch_size = 16

# Training sequences and targets
train_sequences = []
train_targets = []

for i in range(len(train_text) - sequence_length):
    train_sequences.append(train_text[i:i + sequence_length])
    train_targets.append(train_text[i + sequence_length])

# Convert to tensors
X_train = torch.tensor(train_sequences, dtype=torch.long)
y_train = torch.tensor(train_targets, dtype=torch.long)

In [97]:
# Test sequences
test_sequences = []
test_targets = []

for i in range(len(test_text) - sequence_length):
    test_sequences.append(test_text[i:i + sequence_length])
    test_targets.append(test_text[i + sequence_length])

# Convert to tensors
X_test = torch.tensor(test_sequences, dtype=torch.long)
y_test = torch.tensor(test_targets, dtype=torch.long)

In [98]:
from torch.utils.data import DataLoader, TensorDataset

# Create TensorDatasets and DataLoaders
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [99]:
# Define the GRU-based model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.3):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate

        # Embedding layer to convert character indices into dense vectors
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # GRU layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Dropout layer before the fully connected layer
        self.dropout = nn.Dropout(dropout_rate)
        
        # Fully connected layer for output
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Pass input through embedding layer to get (batch_size, sequence_length, embedding_dim)
        x = self.embedding(x)
        
        # Initialize hidden state with zeros: (num_layers, batch_size, hidden_size)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate GRU with embedded input
        out, _ = self.gru(x, h0)

        # Apply dropout to the GRU output before passing to the fully connected layer
        out = self.dropout(out)
        
        # Get the last time step's output for prediction
        out = self.fc(out[:, -1, :])
        
        return out
    
# Parametrs
embedding_dim = 60
hidden_size = 128
output_size = vocab_size  # Number of unique characters
num_layers = 2
dropout_rate = 0

# Initialize the model, loss function, and optimizer
model = GRUModel(input_size=embedding_dim, hidden_size=hidden_size, output_size=output_size, num_layers=num_layers, dropout_rate=dropout_rate)
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for character prediction
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [69]:
best_train_loss = float('inf')      # Best train loss across all epochs
best_test_loss = float('inf')       # Best test loss across all epochs
current_best_train_loss = float('inf')  # Best train loss for the current epoch
current_best_test_loss = float('inf')   # Best test loss for the current epoch

In [100]:
num_epochs = 100

for epoch in range(num_epochs):
    # Training phase
    model.train()
    epoch_train_loss = 0  # Accumulate training loss over batches for the current epoch
    
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        epoch_train_loss += loss.item()  # Accumulate loss
    
    # Calculate the average training loss for this epoch
    avg_train_loss = epoch_train_loss / len(train_loader)
    
    # Update the current best train loss for the session
    #current_best_train_loss = min(current_best_train_loss, avg_train_loss)
    # Update the overall best train loss across all epochs
    best_train_loss = min(best_train_loss, avg_train_loss)
    
    # Evaluation phase
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            test_loss += loss.item()

    # Calculate the average test loss for this epoch
    avg_test_loss = test_loss / len(test_loader)
    
    # Update the current best test loss for the session
    #current_best_test_loss = min(current_best_test_loss, avg_test_loss)
    # Update the overall best test loss across all epochs
    best_test_loss = min(best_test_loss, avg_test_loss)
    
    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f},               Test Loss: {avg_test_loss:.4f}")
        print(f"   Overall Best Train Loss: {best_train_loss:.4f}, Overall Best Test Loss: {best_test_loss:.4f}")
        #print(f"  Current Best Test Loss: {current_best_test_loss:.4f}, Overall Best Test Loss: {best_test_loss:.4f}")

Epoch [10/100], Train Loss: 2.7402,               Test Loss: 2.6440
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.5022
Epoch [20/100], Train Loss: 2.5493,               Test Loss: 2.5562
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.5022
Epoch [30/100], Train Loss: 2.3534,               Test Loss: 2.5172
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.5022
Epoch [40/100], Train Loss: 2.1796,               Test Loss: 2.5064
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.4997
Epoch [50/100], Train Loss: 2.0196,               Test Loss: 2.5335
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.4997
Epoch [60/100], Train Loss: 1.8512,               Test Loss: 2.5831
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.4997
Epoch [70/100], Train Loss: 1.6829,               Test Loss: 2.6534
   Overall Best Train Loss: 1.5309, Overall Best Test Loss: 2.4997
Epoch [80/100], Train Loss: 1.5163,               Test 